...13try:14 from chromite.lib import metrics15except ImportError:16 metrics = utils.metrics_mock17def _get_pidfile_timeout_secs():18 """@returns How long to wait for autoserv to write pidfile."""19 pidfile_timeout_mins = global_config.global_config.get_config_value(20 scheduler_config.CONFIG_SECTION, 'pidfile_timeout_mins', type=int)21 return pidfile_timeout_mins * 6022class PidfileRunMonitor(object):23 """24 Client must call either run() to start a new process or25 attach_to_existing_process().26 """27 class _PidfileException(Exception):28 """29 Raised when there's some unexpected behavior with the pid file, but only30 used internally (never allowed to escape this class).31 """32 def __init__(self):33 self._drone_manager = drone_manager.instance()34 self.lost_process = False35 self._start_time = None36 self.pidfile_id = None37 self._killed = False38 self._state = drone_manager.PidfileContents()39 def _add_nice_command(self, command, nice_level):40 if not nice_level:41 return command42 return ['nice', '-n', str(nice_level)] + command43 def _set_start_time(self):44 self._start_time = time.time()45 def run(self, command, working_directory, num_processes, nice_level=None,46 log_file=None, pidfile_name=None, paired_with_pidfile=None,47 username=None, drone_hostnames_allowed=None):48 assert command is not None49 if nice_level is not None:50 command = ['nice', '-n', str(nice_level)] + command51 self._set_start_time()52 self.pidfile_id = self._drone_manager.execute_command(53 command, working_directory, pidfile_name=pidfile_name,54 num_processes=num_processes, log_file=log_file,55 paired_with_pidfile=paired_with_pidfile, username=username,56 drone_hostnames_allowed=drone_hostnames_allowed)57 def attach_to_existing_process(self, execution_path,58 pidfile_name=drone_manager.AUTOSERV_PID_FILE,59 num_processes=None):60 self._set_start_time()61 self.pidfile_id = self._drone_manager.get_pidfile_id_from(62 execution_path, pidfile_name=pidfile_name)63 if num_processes is not None:64 self._drone_manager.declare_process_count(self.pidfile_id, num_processes)65 def kill(self):66 if self.has_process():67 self._drone_manager.kill_process(self.get_process())68 self._killed = True69 def has_process(self):70 self._get_pidfile_info()71 return self._state.process is not None72 def get_process(self):73 self._get_pidfile_info()74 assert self._state.process is not None75 return self._state.process76 def _read_pidfile(self, use_second_read=False):77 assert self.pidfile_id is not None, (78 'You must call run() or attach_to_existing_process()')79 contents = self._drone_manager.get_pidfile_contents(80 self.pidfile_id, use_second_read=use_second_read)81 if contents.is_invalid():82 self._state = drone_manager.PidfileContents()83 raise self._PidfileException(contents)84 self._state = contents85 def _handle_pidfile_error(self, error, message=''):86 self.on_lost_process(self._state.process)87 def _get_pidfile_info_helper(self):88 if self.lost_process:89 return90 self._read_pidfile()91 if self._state.process is None:92 self._handle_no_process()93 return94 if self._state.exit_status is None:95 # double check whether or not autoserv is running96 if self._drone_manager.is_process_running(self._state.process):97 return98 # pid but no running process - maybe process *just* exited99 self._read_pidfile(use_second_read=True)100 if self._state.exit_status is None:101 # autoserv exited without writing an exit code102 # to the pidfile103 self._handle_pidfile_error(104 'autoserv died without writing exit code')105 def _get_pidfile_info(self):106 """\107 After completion, self._state will contain:108 pid=None, exit_status=None if autoserv has not yet run109 pid!=None, exit_status=None if autoserv is running110 pid!=None, exit_status!=None if autoserv has completed111 """112 try:113 self._get_pidfile_info_helper()114 except self._PidfileException, exc:115 self._handle_pidfile_error('Pidfile error', traceback.format_exc())116 def _handle_no_process(self):117 """\118 Called when no pidfile is found or no pid is in the pidfile.119 """120 if time.time() - self._start_time > _get_pidfile_timeout_secs():121 # If we aborted the process, and we find that it has exited without122 # writing a pidfile, then it's because we killed it, and thus this123 # isn't a surprising situation.124 if not self._killed:125 metrics.Counter('chromeos/autotest/errors/scheduler/no_pidfile'126 ).increment()127 else:128 logging.warning("%s didn't exit after SIGTERM", self.pidfile_id)129 self.on_lost_process()130 def on_lost_process(self, process=None):131 """\132 Called when autoserv has exited without writing an exit status,133 or we've timed out waiting for autoserv to write a pid to the134 pidfile. In either case, we just return failure and the caller...

