""" This file contains the base MEOW conductor defintion. This should be inherited from for all conductor instances. Author(s): David Marchant """ import shutil import subprocess import os from datetime import datetime from threading import Event, Thread from time import sleep from typing import Any, Tuple, Dict, Union from meow_base.core.meow import valid_job from meow_base.core.vars import VALID_CONDUCTOR_NAME_CHARS, VALID_CHANNELS, \ JOB_STATUS, JOB_START_TIME, META_FILE, STATUS_RUNNING, STATUS_DONE , \ BACKUP_JOB_ERROR_FILE, JOB_END_TIME, STATUS_FAILED, JOB_ERROR, \ get_drt_imp_msg from meow_base.functionality.file_io import write_file, \ threadsafe_read_status, threadsafe_update_status from meow_base.functionality.validation import check_implementation, \ valid_string, valid_existing_dir_path, valid_natural, valid_dir_path from meow_base.functionality.naming import generate_conductor_id class BaseConductor: # An identifier for a conductor within the runner. Can be manually set in # the constructor, or autogenerated if no name provided. name:str # A channel for sending messages to the runner job queue. Note that this # will be overridden by a MeowRunner, if a conductor instance is passed to # it, and so does not need to be initialised within the conductor itself, # unless the conductor is running independently of a runner. to_runner_job: VALID_CHANNELS # Directory where queued jobs are initially written to. Note that this # will be overridden by a MeowRunner, if a handler instance is passed to # it, and so does not need to be initialised within the handler itself. job_queue_dir:str # Directory where completed jobs are finally written to. Note that this # will be overridden by a MeowRunner, if a handler instance is passed to # it, and so does not need to be initialised within the handler itself. job_output_dir:str # A count, for how long a conductor will wait if told that there are no # jobs in the runner, before polling again. Default is 5 seconds. pause_time: int def __init__(self, name:str="", pause_time:int=5)->None: """BaseConductor Constructor. This will check that any class inheriting from it implements its validation functions.""" check_implementation(type(self).valid_execute_criteria, BaseConductor) if not name: name = generate_conductor_id() self._is_valid_name(name) self.name = name self._is_valid_pause_time(pause_time) self.pause_time = pause_time def __new__(cls, *args, **kwargs): """A check that this base class is not instantiated itself, only inherited from""" if cls is BaseConductor: msg = get_drt_imp_msg(BaseConductor) raise TypeError(msg) return object.__new__(cls) def _is_valid_name(self, name:str)->None: """Validation check for 'name' variable from main constructor. Is automatically called during initialisation. This does not need to be overridden by child classes.""" valid_string(name, VALID_CONDUCTOR_NAME_CHARS) def _is_valid_pause_time(self, pause_time:int)->None: """Validation check for 'pause_time' variable from main constructor. Is automatically called during initialisation. This does not need to be overridden by child classes.""" valid_natural(pause_time, hint="BaseHandler.pause_time") def prompt_runner_for_job(self)->Union[Dict[str,Any],Any]: self.to_runner_job.send(1) if self.to_runner_job.poll(self.pause_time): return self.to_runner_job.recv() return None def start(self)->None: """Function to start the conductor as an ongoing thread, as defined by the main_loop function. Together, these will execute any code in a implemented conductors execute function sequentially, but concurrently to any other conductors running or other runner operations. This is intended as a naive mmultiprocessing implementation, and any more in depth parallelisation of execution must be implemented by a user by overriding this function, and the stop function.""" self._stop_event = Event() self._handle_thread = Thread( target=self.main_loop, args=(self._stop_event,), daemon=True, name="conductor_thread" ) self._handle_thread.start() def stop(self)->None: """Function to stop the conductor as an ongoing thread. May be overidden by any child class. This function should also be overriden if the start function has been.""" self._stop_event.set() self._handle_thread.join() def main_loop(self, stop_event)->None: """Function defining an ongoing thread, as started by the start function and stoped by the stop function. """ while not stop_event.is_set(): reply = self.prompt_runner_for_job() # If we have recieved 'None' then we have already timed out so skip # this loop and start again if reply is None: continue try: valid_existing_dir_path(reply) except: # Were not given a job dir, so sleep before trying again sleep(self.pause_time) try: self.execute(reply) except: # TODO some error reporting here pass def valid_execute_criteria(self, job:Dict[str,Any])->Tuple[bool,str]: """Function to determine given an job defintion, if this conductor can process it or not. Must be implemented by any child process.""" pass def run_job(self, job_dir:str)->None: """Function to actually execute a job. This will read job defintions from its meta file, update the meta file and attempt to execute. Some unspecific feedback will be given on execution failure, but depending on what it is it may be up to the job itself to provide more detailed feedback. If you simply wish to alter the conditions under which the job is executed, please instead look at the execute function.""" valid_dir_path(job_dir, must_exist=True) # Test our job parameters. Even if its gibberish, we still move to # output abort = False try: meta_file = os.path.join(job_dir, META_FILE) job = threadsafe_read_status(meta_file) valid_job(job) # update the status file with running status threadsafe_update_status( { JOB_STATUS: STATUS_RUNNING, JOB_START_TIME: datetime.now() }, meta_file ) except Exception as e: # If something has gone wrong at this stage then its bad, so we # need to make our own error file error_file = os.path.join(job_dir, BACKUP_JOB_ERROR_FILE) write_file(f"Recieved incorrectly setup job.\n\n{e}", error_file) abort = True # execute the job if not abort: try: result = subprocess.call( os.path.join(job_dir, job["tmp script command"]), cwd="." ) if result == 0: # Update the status file with the finalised status threadsafe_update_status( { JOB_STATUS: STATUS_DONE, JOB_END_TIME: datetime.now() }, meta_file ) else: # Update the status file with the error status. Don't # overwrite any more specific error messages already # created threadsafe_update_status( { JOB_STATUS: STATUS_FAILED, JOB_END_TIME: datetime.now(), JOB_ERROR: "Job execution returned non-zero." }, meta_file ) except Exception as e: # Update the status file with the error status. Don't overwrite # any more specific error messages already created threadsafe_update_status( { JOB_STATUS: STATUS_FAILED, JOB_END_TIME: datetime.now(), JOB_ERROR: f"Job execution failed. {e}" }, meta_file ) # Move the contents of the execution directory to the final output # directory. job_output_dir = \ os.path.join(self.job_output_dir, os.path.basename(job_dir)) shutil.move(job_dir, job_output_dir) def execute(self, job_dir:str)->None: """Function to run job execution. By default this will simply call the run_job function, to execute the job locally. However, this function may be overridden to execute the job in some other manner, such as on another resource. Note that the job itself should be executed using the run_job func in order to maintain expected logging etc.""" self.run_job(job_dir)