网格计算

开发平台：
Java

hod.py：源码内容
							#Licensed to the Apache Software Foundation (ASF) under one
#or more contributor license agreements.  See the NOTICE file
#distributed with this work for additional information
#regarding copyright ownership.  The ASF licenses this file
#to you under the Apache License, Version 2.0 (the
#"License"); you may not use this file except in compliance
#with the License.  You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# -*- python -*-
import sys, os, getpass, pprint, re, cPickle, random, shutil, time, errno
import hodlib.Common.logger
from hodlib.ServiceRegistry.serviceRegistry import svcrgy
from hodlib.Common.xmlrpc import hodXRClient
from hodlib.Common.util import to_http_url, get_exception_string
from hodlib.Common.util import get_exception_error_string
from hodlib.Common.util import hodInterrupt, HodInterruptException
from hodlib.Common.util import HOD_INTERRUPTED_CODE
from hodlib.Common.nodepoolutil import NodePoolUtil
from hodlib.Hod.hadoop import hadoopCluster, hadoopScript
CLUSTER_DATA_FILE = 'clusters'
INVALID_STATE_FILE_MSGS = 
              [
                "Requested operation cannot be performed. Cannot read %s: " + 
                "Permission denied.",
                "Requested operation cannot be performed. " + 
                "Cannot write to %s: Permission denied.",
                "Requested operation cannot be performed. " + 
                "Cannot read/write to %s: Permission denied.",
                "Cannot update %s: Permission denied. " + 
                "Cluster is deallocated, but info and list " + 
                "operations might show incorrect information.",
              ]
class hodState:
  def __init__(self, store):
    self.__store = store
    self.__stateFile = None
    self.__init_store()
    self.__STORE_EXT = ".state"
   
  def __init_store(self):
    if not os.path.exists(self.__store):
      os.mkdir(self.__store)
  
  def __set_state_file(self, id=None):
    if id:
      self.__stateFile = os.path.join(self.__store, "%s%s" % (id, 
                                      self.__STORE_EXT))
    else:
      for item in os.listdir(self.__store):
        if item.endswith(self.__STORE_EXT):  
          self.__stateFile = os.path.join(self.__store, item)          
  def get_state_file(self):
    return self.__stateFile
          
  def checkStateFile(self, id=None, modes=(os.R_OK,)):
    # is state file exists/readable/writable/both?
    self.__set_state_file(id)
    # return true if file doesn't exist, because HOD CAN create
    # state file and so WILL have permissions to read and/or write
    try:
      os.stat(self.__stateFile)
    except OSError, err:
      if err.errno == errno.ENOENT: # error 2 (no such file)
        return True
    # file exists
    ret = True
    for mode in modes:
      ret = ret and os.access(self.__stateFile, mode)
    return ret
  def read(self, id=None):
    info = {}
    
    self.__set_state_file(id)
  
    if self.__stateFile:
      if os.path.isfile(self.__stateFile):
        stateFile = open(self.__stateFile, 'r')
        try:
          info = cPickle.load(stateFile)
        except EOFError:
          pass
        
        stateFile.close()
    
    return info
           
  def write(self, id, info):
    self.__set_state_file(id)
    if not os.path.exists(self.__stateFile):
      self.clear(id)
 
    stateFile = open(self.__stateFile, 'w')
    cPickle.dump(info, stateFile)
    stateFile.close()
  
  def clear(self, id=None):
    self.__set_state_file(id)
    if self.__stateFile and os.path.exists(self.__stateFile):
      os.remove(self.__stateFile)
    else:
      for item in os.listdir(self.__store):
        if item.endswith(self.__STORE_EXT):
          os.remove(item)
        
class hodRunner:
  def __init__(self, cfg, log=None, cluster=None):
    self.__hodhelp = hodHelp()
    self.__ops = self.__hodhelp.ops
    self.__cfg = cfg  
    self.__npd = self.__cfg['nodepooldesc']
    self.__opCode = 0
    self.__user = getpass.getuser()
    self.__registry = None
    self.__baseLogger = None
    # Allowing to pass in log object to help testing - a stub can be passed in
    if log is None:
      self.__setup_logger()
    else:
      self.__log = log
    
    self.__userState = hodState(self.__cfg['hod']['user_state']) 
    
    self.__clusterState = None
    self.__clusterStateInfo = { 'env' : None, 'hdfs' : None, 'mapred' : None }
    
    # Allowing to pass in log object to help testing - a stib can be passed in
    if cluster is None:
      self.__cluster = hadoopCluster(self.__cfg, self.__log)
    else:
      self.__cluster = cluster
  
  def __setup_logger(self):
    self.__baseLogger = hodlib.Common.logger.hodLog('hod')
    self.__log = self.__baseLogger.add_logger(self.__user )
 
    if self.__cfg['hod']['stream']:
      self.__baseLogger.add_stream(level=self.__cfg['hod']['debug'], 
                            addToLoggerNames=(self.__user ,))
  
    if self.__cfg['hod'].has_key('syslog-address'):
      self.__baseLogger.add_syslog(self.__cfg['hod']['syslog-address'], 
                                   level=self.__cfg['hod']['debug'], 
                                   addToLoggerNames=(self.__user ,))
  def get_logger(self):
    return self.__log
  def __setup_cluster_logger(self, directory):
    self.__baseLogger.add_file(logDirectory=directory, level=4,
                          backupCount=self.__cfg['hod']['log-rollover-count'],
                          addToLoggerNames=(self.__user ,))
  def __setup_cluster_state(self, directory):
    self.__clusterState = hodState(directory)
  def __norm_cluster_dir(self, directory):
    directory = os.path.expanduser(directory)
    if not os.path.isabs(directory):
      directory = os.path.join(self.__cfg['hod']['original-dir'], directory)
    directory = os.path.abspath(directory)
    
    return directory
  
  def __setup_service_registry(self):
    cfg = self.__cfg['hod'].copy()
    cfg['debug'] = 0
    self.__registry = svcrgy(cfg, self.__log)
    self.__registry.start()
    self.__log.debug(self.__registry.getXMLRPCAddr())
    self.__cfg['hod']['xrs-address'] = self.__registry.getXMLRPCAddr()
    self.__cfg['ringmaster']['svcrgy-addr'] = self.__cfg['hod']['xrs-address']
  def __set_cluster_state_info(self, env, hdfs, mapred, ring, jobid, min, max):
    self.__clusterStateInfo['env'] = env
    self.__clusterStateInfo['hdfs'] = "http://%s" % hdfs
    self.__clusterStateInfo['mapred'] = "http://%s" % mapred
    self.__clusterStateInfo['ring'] = ring
    self.__clusterStateInfo['jobid'] = jobid
    self.__clusterStateInfo['min'] = min
    self.__clusterStateInfo['max'] = max
    
  def __set_user_state_info(self, info):
    userState = self.__userState.read(CLUSTER_DATA_FILE)
    for key in info.keys():
      userState[key] = info[key]
      
    self.__userState.write(CLUSTER_DATA_FILE, userState)  
  def __remove_cluster(self, clusterDir):
    clusterInfo = self.__userState.read(CLUSTER_DATA_FILE)
    if clusterDir in clusterInfo:
      del(clusterInfo[clusterDir])
      self.__userState.write(CLUSTER_DATA_FILE, clusterInfo)
      
  def __cleanup(self):
    if self.__registry: self.__registry.stop()
    
  def __check_operation(self, operation):    
    opList = operation.split()
    
    if not opList[0] in self.__ops:
      self.__log.critical("Invalid hod operation specified: %s" % operation)
      self._op_help(None)
      self.__opCode = 2
         
    return opList 
  
  def __adjustMasterFailureCountConfig(self, nodeCount):
    # This method adjusts the ringmaster.max-master-failures variable
    # to a value that is bounded by the a function of the number of
    # nodes.
    maxFailures = self.__cfg['ringmaster']['max-master-failures']
    # Count number of masters required - depends on which services
    # are external
    masters = 0
    if not self.__cfg['gridservice-hdfs']['external']:
      masters += 1
    if not self.__cfg['gridservice-mapred']['external']:
      masters += 1
    # So, if there are n nodes and m masters, we look atleast for
    # all masters to come up. Therefore, atleast m nodes should be
    # good, which means a maximum of n-m master nodes can fail.
    maxFailedNodes = nodeCount - masters
    # The configured max number of failures is now bounded by this
    # number.
    self.__cfg['ringmaster']['max-master-failures'] = 
                              min(maxFailures, maxFailedNodes)
  def _op_allocate(self, args):
    operation = "allocate"
    argLength = len(args)
    min = 0
    max = 0
    errorFlag = False
    errorMsgs = []
    if argLength == 3:
      nodes = args[2]
      clusterDir = self.__norm_cluster_dir(args[1])
      if not os.path.exists(clusterDir):
        try:
          os.makedirs(clusterDir)
        except OSError, err:
          errorFlag = True
          errorMsgs.append("Could not create cluster directory. %s" 
                            % (str(err)))
      elif not os.path.isdir(clusterDir):
        errorFlag = True
        errorMsgs.append( 
                    "Invalid cluster directory (--hod.clusterdir or -d) : " + 
                         clusterDir + " : Not a directory")
        
      if int(nodes) < 3 :
        errorFlag = True
        errorMsgs.append("Invalid nodecount (--hod.nodecount or -n) : " + 
                         "Must be >= 3. Given nodes: %s" % nodes)
      if errorFlag:
        for msg in errorMsgs:
          self.__log.critical(msg)
        self.__opCode = 3
        return
      if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, 
                                              (os.R_OK, os.W_OK)):
        self.__log.critical(INVALID_STATE_FILE_MSGS[2] % 
                         self.__userState.get_state_file())
        self.__opCode = 1
        return
      clusterList = self.__userState.read(CLUSTER_DATA_FILE)
      if clusterDir in clusterList.keys():
        self.__setup_cluster_state(clusterDir)
        clusterInfo = self.__clusterState.read()
        # Check if the job is not running. Only then can we safely
        # allocate another cluster. Otherwise the user would need
        # to deallocate and free up resources himself.
        if clusterInfo.has_key('jobid') and 
            self.__cluster.is_cluster_deallocated(clusterInfo['jobid']):
          self.__log.warn("Found a dead cluster at cluster directory '%s'. Deallocating it to allocate a new one." % (clusterDir))
          self.__remove_cluster(clusterDir)
          self.__clusterState.clear()
        else:
          self.__log.critical("Found a previously allocated cluster at cluster directory '%s'. HOD cannot determine if this cluster can be automatically deallocated. Deallocate the cluster if it is unused." % (clusterDir))
          self.__opCode = 12
          return
 
      self.__setup_cluster_logger(clusterDir)
      (status, message) = self.__cluster.is_valid_account()
      if status is not 0:
        if message:
          for line in message:
            self.__log.critical("verify-account output: %s" % line)
        self.__log.critical("Cluster cannot be allocated because account verification failed. " 
                              + "verify-account returned exit code: %s." % status)
        self.__opCode = 4
        return
      else:
        self.__log.debug("verify-account returned zero exit code.")
        if message:
          self.__log.debug("verify-account output: %s" % message)
      if re.match('d+-d+', nodes):
        (min, max) = nodes.split("-")
        min = int(min)
        max = int(max)
      else:
        try:
          nodes = int(nodes)
          min = nodes
          max = nodes
        except ValueError:
          print self.__hodhelp.help(operation)
          self.__log.critical(
          "%s operation requires a pos_int value for n(nodecount)." % 
          operation)
          self.__opCode = 3
        else:
          self.__setup_cluster_state(clusterDir)
          clusterInfo = self.__clusterState.read()
          self.__opCode = self.__cluster.check_cluster(clusterInfo)
          if self.__opCode == 0 or self.__opCode == 15:
            self.__setup_service_registry()   
            if hodInterrupt.isSet(): 
              self.__cleanup()
              raise HodInterruptException()
            self.__log.debug("Service Registry started.")
            self.__adjustMasterFailureCountConfig(nodes)
            
            try:
              allocateStatus = self.__cluster.allocate(clusterDir, min, max)    
            except HodInterruptException, h:
              self.__cleanup()
              raise h
            # Allocation has gone through.
            # Don't care about interrupts any more
            try:
              if allocateStatus == 0:
                self.__set_cluster_state_info(os.environ, 
                                              self.__cluster.hdfsInfo, 
                                              self.__cluster.mapredInfo, 
                                              self.__cluster.ringmasterXRS,
                                              self.__cluster.jobId,
                                              min, max)
                self.__setup_cluster_state(clusterDir)
                self.__clusterState.write(self.__cluster.jobId, 
                                          self.__clusterStateInfo)
                #  Do we need to check for interrupts here ??
  
                self.__set_user_state_info( 
                  { clusterDir : self.__cluster.jobId, } )
              self.__opCode = allocateStatus
            except Exception, e:
              # Some unknown problem.
              self.__cleanup()
              self.__cluster.deallocate(clusterDir, self.__clusterStateInfo)
              self.__opCode = 1
              raise Exception(e)
          elif self.__opCode == 12:
            self.__log.critical("Cluster %s already allocated." % clusterDir)
          elif self.__opCode == 10:
            self.__log.critical("deadt%st%s" % (clusterInfo['jobid'], 
                                                  clusterDir))
          elif self.__opCode == 13:
            self.__log.warn("hdfs deadt%st%s" % (clusterInfo['jobid'], 
                                                       clusterDir))
          elif self.__opCode == 14:
            self.__log.warn("mapred deadt%st%s" % (clusterInfo['jobid'], 
                                                     clusterDir))   
          
          if self.__opCode > 0 and self.__opCode != 15:
            self.__log.critical("Cannot allocate cluster %s" % clusterDir)
    else:
      print self.__hodhelp.help(operation)
      self.__log.critical("%s operation requires two arguments. "  % operation
                        + "A cluster directory and a nodecount.")
      self.__opCode = 3
 
  def _is_cluster_allocated(self, clusterDir):
    if os.path.isdir(clusterDir):
      self.__setup_cluster_state(clusterDir)
      clusterInfo = self.__clusterState.read()
      if clusterInfo != {}:
        return True
    return False
  def _op_deallocate(self, args):
    operation = "deallocate"
    argLength = len(args)
    if argLength == 2:
      clusterDir = self.__norm_cluster_dir(args[1])
      if os.path.isdir(clusterDir):
        self.__setup_cluster_state(clusterDir)
        clusterInfo = self.__clusterState.read()
        if clusterInfo == {}:
          self.__handle_invalid_cluster_directory(clusterDir, cleanUp=True)
        else:
          self.__opCode = 
            self.__cluster.deallocate(clusterDir, clusterInfo)
          # irrespective of whether deallocate failed or not
          # remove the cluster state.
          self.__clusterState.clear()
          if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.W_OK,)):
            self.__log.critical(INVALID_STATE_FILE_MSGS[3] % 
                               self.__userState.get_state_file())
            self.__opCode = 1
            return
          self.__remove_cluster(clusterDir)
      else:
        self.__handle_invalid_cluster_directory(clusterDir, cleanUp=True)
    else:
      print self.__hodhelp.help(operation)
      self.__log.critical("%s operation requires one argument. "  % operation
                        + "A cluster path.")
      self.__opCode = 3
            
  def _op_list(self, args):
    operation = 'list'
    clusterList = self.__userState.read(CLUSTER_DATA_FILE)
    for path in clusterList.keys():
      if not os.path.isdir(path):
        self.__log.info("cluster state unknownt%st%s" % (clusterList[path], path))
        continue
      self.__setup_cluster_state(path)
      clusterInfo = self.__clusterState.read()
      if clusterInfo == {}:
        # something wrong with the cluster directory.
        self.__log.info("cluster state unknownt%st%s" % (clusterList[path], path))
        continue
      clusterStatus = self.__cluster.check_cluster(clusterInfo)
      if clusterStatus == 12:
        self.__log.info("alivet%st%s" % (clusterList[path], path))
      elif clusterStatus == 10:
        self.__log.info("deadt%st%s" % (clusterList[path], path))
      elif clusterStatus == 13:
        self.__log.info("hdfs deadt%st%s" % (clusterList[path], path))
      elif clusterStatus == 14:
        self.__log.info("mapred deadt%st%s" % (clusterList[path], path))    
         
  def _op_info(self, args):
    operation = 'info'
    argLength = len(args)  
    if argLength == 2:
      clusterDir = self.__norm_cluster_dir(args[1])
      if os.path.isdir(clusterDir):
        self.__setup_cluster_state(clusterDir)
        clusterInfo = self.__clusterState.read()
        if clusterInfo == {}:
          # something wrong with the cluster directory.
          self.__handle_invalid_cluster_directory(clusterDir)
        else:
          clusterStatus = self.__cluster.check_cluster(clusterInfo)
          if clusterStatus == 12:
            self.__print_cluster_info(clusterInfo)
            self.__log.info("hadoop-site.xml at %s" % clusterDir)
          elif clusterStatus == 10:
            self.__log.critical("%s cluster is dead" % clusterDir)
          elif clusterStatus == 13:
            self.__log.warn("%s cluster hdfs is dead" % clusterDir)
          elif clusterStatus == 14:
            self.__log.warn("%s cluster mapred is dead" % clusterDir)
          if clusterStatus != 12:
            if clusterStatus == 15:
              self.__log.critical("Cluster %s not allocated." % clusterDir)
            else:
              self.__print_cluster_info(clusterInfo)
              self.__log.info("hadoop-site.xml at %s" % clusterDir)
            
            self.__opCode = clusterStatus
      else:
        self.__handle_invalid_cluster_directory(clusterDir)
    else:
      print self.__hodhelp.help(operation)
      self.__log.critical("%s operation requires one argument. "  % operation
                        + "A cluster path.")
      self.__opCode = 3      
  def __handle_invalid_cluster_directory(self, clusterDir, cleanUp=False):
    if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.R_OK,)):
      self.__log.critical(INVALID_STATE_FILE_MSGS[0] % 
                           self.__userState.get_state_file())
      self.__opCode = 1
      return
    clusterList = self.__userState.read(CLUSTER_DATA_FILE)
    if clusterDir in clusterList.keys():
      # previously allocated cluster.
      self.__log.critical("Cannot find information for cluster with id '%s' in previously allocated cluster directory '%s'." % (clusterList[clusterDir], clusterDir))
      if cleanUp:
        self.__cluster.delete_job(clusterList[clusterDir])
        self.__log.critical("Freeing resources allocated to the cluster.")
        if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.W_OK,)):
          self.__log.critical(INVALID_STATE_FILE_MSGS[1] % 
                              self.__userState.get_state_file())
          self.__opCode = 1
          return
        self.__remove_cluster(clusterDir)
      self.__opCode = 3
    else:
      if not os.path.exists(clusterDir):
        self.__log.critical(  
                  "Invalid hod.clusterdir(--hod.clusterdir or -d). " + 
                  clusterDir + " : No such directory")
      elif not os.path.isdir(clusterDir):
        self.__log.critical( 
                  "Invalid hod.clusterdir(--hod.clusterdir or -d). " + 
                  clusterDir + " : Not a directory")
      else:
        self.__log.critical( 
                  "Invalid hod.clusterdir(--hod.clusterdir or -d). " + 
                  clusterDir + " : Not tied to any allocated cluster.")
      self.__opCode = 15
    
  def __print_cluster_info(self, clusterInfo):
    keys = clusterInfo.keys()
    _dict = { 
              'jobid' : 'Cluster Id', 'min' : 'Nodecount',
              'hdfs' : 'HDFS UI at' , 'mapred' : 'Mapred UI at'
            }
    for key in _dict.keys():
      if clusterInfo.has_key(key):
        self.__log.info("%s %s" % (_dict[key], clusterInfo[key]))
    if clusterInfo.has_key('ring'):
      self.__log.debug("%st%s" % ('Ringmaster at ', clusterInfo['ring']))
    
    if self.__cfg['hod']['debug'] == 4:
      for var in clusterInfo['env'].keys():
        self.__log.debug("%s = %s" % (var, clusterInfo['env'][var]))
  def _op_help(self, arg):
    if arg == None or arg.__len__() != 2:
      print "hod commands:n"
      for op in self.__ops:
        print self.__hodhelp.help(op)
    else:
      if arg[1] not in self.__ops:
        print self.__hodhelp.help('help')
        self.__log.critical("Help requested for invalid operation : %s"%arg[1])
        self.__opCode = 3
      else: print self.__hodhelp.help(arg[1])
  def operation(self):  
    operation = self.__cfg['hod']['operation']
    try:
      opList = self.__check_operation(operation)
      if self.__opCode == 0:
        if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.R_OK,)):
           self.__log.critical(INVALID_STATE_FILE_MSGS[0] % 
                         self.__userState.get_state_file())
           self.__opCode = 1
           return self.__opCode
        getattr(self, "_op_%s" % opList[0])(opList)
    except HodInterruptException, h:
      self.__log.critical("op: %s failed because of a process interrupt." 
                                                                % operation)
      self.__opCode = HOD_INTERRUPTED_CODE
    except:
      self.__log.critical("op: %s failed: %s" % (operation,
                          get_exception_error_string()))
      self.__log.debug(get_exception_string())
    
    self.__cleanup()
    
    self.__log.debug("return code: %s" % self.__opCode)
    
    return self.__opCode
  
  def script(self):
    errorFlag = False
    errorMsgs = []
    scriptRet = 0 # return from the script, if run
    
    script = self.__cfg['hod']['script']
    nodes = self.__cfg['hod']['nodecount']
    clusterDir = self.__cfg['hod']['clusterdir']
    
    if not os.path.exists(script):
      errorFlag = True
      errorMsgs.append("Invalid script file (--hod.script or -s) : " + 
                       script + " : No such file")
    elif not os.path.isfile(script):
      errorFlag = True
      errorMsgs.append("Invalid script file (--hod.script or -s) : " + 
                       script + " : Not a file.")
    else:
      isExecutable = os.access(script, os.X_OK)
      if not isExecutable:
        errorFlag = True
        errorMsgs.append("Invalid script file (--hod.script or -s) : " + 
                         script + " : Not an executable.")
    if not os.path.exists(clusterDir):
      try:
        os.makedirs(clusterDir)
      except OSError, err:
        errorFlag = True
        errorMsgs.append("Could not create cluster directory. %s" % (str(err)))
    elif not os.path.isdir(clusterDir):
      errorFlag = True
      errorMsgs.append( 
                  "Invalid cluster directory (--hod.clusterdir or -d) : " + 
                       clusterDir + " : Not a directory")
    if int(self.__cfg['hod']['nodecount']) < 3 :
      errorFlag = True
      errorMsgs.append("Invalid nodecount (--hod.nodecount or -n) : " + 
                       "Must be >= 3. Given nodes: %s" % nodes)
    if errorFlag:
      for msg in errorMsgs:
        self.__log.critical(msg)
      self.handle_script_exit_code(scriptRet, clusterDir)
      sys.exit(3)
    try:
      self._op_allocate(('allocate', clusterDir, str(nodes)))
      if self.__opCode == 0:
        if self.__cfg['hod'].has_key('script-wait-time'):
          time.sleep(self.__cfg['hod']['script-wait-time'])
          self.__log.debug('Slept for %d time. Now going to run the script' % self.__cfg['hod']['script-wait-time'])
        if hodInterrupt.isSet():
          self.__log.debug('Hod interrupted - not executing script')
        else:
          scriptRunner = hadoopScript(clusterDir, 
                                  self.__cfg['hod']['original-dir'])
          self.__opCode = scriptRunner.run(script)
          scriptRet = self.__opCode
          self.__log.info("Exit code from running the script: %d" % self.__opCode)
      else:
        self.__log.critical("Error %d in allocating the cluster. Cannot run the script." % self.__opCode)
      if hodInterrupt.isSet():
        # Got interrupt while executing script. Unsetting it for deallocating
        hodInterrupt.setFlag(False)
      if self._is_cluster_allocated(clusterDir):
        self._op_deallocate(('deallocate', clusterDir))
    except HodInterruptException, h:
      self.__log.critical("Script failed because of a process interrupt.")
      self.__opCode = HOD_INTERRUPTED_CODE
    except:
      self.__log.critical("script: %s failed: %s" % (script,
                          get_exception_error_string()))
      self.__log.debug(get_exception_string())
    
    self.__cleanup()
    self.handle_script_exit_code(scriptRet, clusterDir)
    
    return self.__opCode
  def handle_script_exit_code(self, scriptRet, clusterDir):
    # We want to give importance to a failed script's exit code, and write out exit code to a file separately
    # so users can easily get it if required. This way they can differentiate between the script's exit code
    # and hod's exit code.
    if os.path.exists(clusterDir):
      exit_code_file_name = (os.path.join(clusterDir, 'script.exitcode'))
      if scriptRet != 0:
        exit_code_file = open(exit_code_file_name, 'w')
        print >>exit_code_file, scriptRet
        exit_code_file.close()
        self.__opCode = scriptRet
      else:
        #ensure script exit code file is not there:
        if (os.path.exists(exit_code_file_name)):
          os.remove(exit_code_file_name)
class hodHelp:
  def __init__(self):
    self.ops = ['allocate', 'deallocate', 'info', 'list','script',  'help']
    self.usage_strings = 
      {
        'allocate'   : 'hod allocate -d <clusterdir> -n <nodecount> [OPTIONS]',
        'deallocate' : 'hod deallocate -d <clusterdir> [OPTIONS]',
        'list'       : 'hod list [OPTIONS]',
        'info'       : 'hod info -d <clusterdir> [OPTIONS]',
        'script'     :
              'hod script -d <clusterdir> -n <nodecount> -s <script> [OPTIONS]',
        'help'       : 'hod help <OPERATION>',
        }
    self.description_strings = 
      {
       'allocate' : "Allocates a cluster of n nodes using the specified n" + 
      "              cluster directory to store cluster state n" + 
      "              information. The Hadoop site XML is also stored n" + 
      "              in this location.n",
       'deallocate' : "Deallocates a cluster using the specified n" + 
      "             cluster directory.  This operation is also n" + 
      "             required to clean up a dead cluster.n",
       'list' : "List all clusters currently allocated by a user, n" + 
      "              along with limited status information and the n" + 
      "              cluster ID.n",
       'info' : "Provide detailed information on an allocated cluster.n",
       'script' : "Allocates a cluster of n nodes with the given n" +
           "              cluster directory, runs the specified script n" + 
           "              using the allocated cluster, and then n" + 
           "              deallocates the cluster.n",
 
       'help' : "Print help for the operation and exit.n" + 
                "Available operations : %s.n" % self.ops,
       }
  def usage(self, op):
    return "Usage       : " + self.usage_strings[op] + "n" + 
           "For full description: hod help " + op + ".n"
  def help(self, op=None):
    if op is None:
      return "hod <operation> [ARGS] [OPTIONS]n" + 
             "Available operations : %sn" % self.ops + 
             "For help on a particular operation : hod help <operation>.n" + 
             "For all options : hod help options."
    else:
      return "Usage       : " + self.usage_strings[op] + "n" + 
             "Description : " + self.description_strings[op] + 
             "For all options : hod help options.n"