388 lines
16 KiB
Python
388 lines
16 KiB
Python
|
|
"""
|
|
This file contains the defintion for the MeowRunner, the main construct used
|
|
for actually orchestration MEOW analysis. It is intended as a modular system,
|
|
with monitors, handlers, and conductors being swappable at initialisation.
|
|
|
|
Author(s): David Marchant
|
|
"""
|
|
import os
|
|
import sys
|
|
import threading
|
|
|
|
from multiprocessing import Pipe
|
|
from random import randrange
|
|
from typing import Any, Union, Dict, List
|
|
|
|
from core.base_conductor import BaseConductor
|
|
from core.base_handler import BaseHandler
|
|
from core.base_monitor import BaseMonitor
|
|
from core.correctness.vars import DEBUG_WARNING, DEBUG_INFO, EVENT_TYPE, \
|
|
VALID_CHANNELS, META_FILE, DEFAULT_JOB_OUTPUT_DIR, DEFAULT_JOB_QUEUE_DIR, \
|
|
EVENT_PATH
|
|
from core.correctness.validation import check_type, valid_list, valid_dir_path
|
|
from functionality.debug import setup_debugging, print_debug
|
|
from functionality.file_io import make_dir, read_yaml
|
|
from functionality.process_io import wait
|
|
|
|
|
|
class MeowRunner:
|
|
# A collection of all monitors in the runner
|
|
monitors:List[BaseMonitor]
|
|
# A collection of all handlers in the runner
|
|
handlers:List[BaseHandler]
|
|
# A collection of all conductors in the runner
|
|
conductors:List[BaseConductor]
|
|
# A collection of all channels from each monitor
|
|
from_monitors: List[VALID_CHANNELS]
|
|
# A collection of all channels from each handler
|
|
from_handlers: List[VALID_CHANNELS]
|
|
# Directory where queued jobs are initially written to
|
|
job_queue_dir:str
|
|
# Directory where completed jobs are finally written to
|
|
job_output_dir:str
|
|
def __init__(self, monitors:Union[BaseMonitor,List[BaseMonitor]],
|
|
handlers:Union[BaseHandler,List[BaseHandler]],
|
|
conductors:Union[BaseConductor,List[BaseConductor]],
|
|
job_queue_dir:str=DEFAULT_JOB_QUEUE_DIR,
|
|
job_output_dir:str=DEFAULT_JOB_OUTPUT_DIR,
|
|
print:Any=sys.stdout, logging:int=0)->None:
|
|
"""MeowRunner constructor. This connects all provided monitors,
|
|
handlers and conductors according to what events and jobs they produce
|
|
or consume."""
|
|
|
|
self._is_valid_job_queue_dir(job_queue_dir)
|
|
self._is_valid_job_output_dir(job_output_dir)
|
|
|
|
self._is_valid_conductors(conductors)
|
|
# If conductors isn't a list, make it one
|
|
if not type(conductors) == list:
|
|
conductors = [conductors]
|
|
for conductor in conductors:
|
|
conductor.job_output_dir = job_output_dir
|
|
conductor.job_queue_dir = job_queue_dir
|
|
|
|
self.conductors = conductors
|
|
|
|
self._is_valid_handlers(handlers)
|
|
# If handlers isn't a list, make it one
|
|
if not type(handlers) == list:
|
|
handlers = [handlers]
|
|
self.from_handlers = []
|
|
for handler in handlers:
|
|
# Create a channel from the handler back to this runner
|
|
handler_to_runner_reader, handler_to_runner_writer = Pipe()
|
|
handler.to_runner = handler_to_runner_writer
|
|
handler.job_queue_dir = job_queue_dir
|
|
self.from_handlers.append(handler_to_runner_reader)
|
|
self.handlers = handlers
|
|
|
|
self._is_valid_monitors(monitors)
|
|
# If monitors isn't a list, make it one
|
|
if not type(monitors) == list:
|
|
monitors = [monitors]
|
|
self.monitors = monitors
|
|
self.from_monitors = []
|
|
for monitor in self.monitors:
|
|
# Create a channel from the monitor back to this runner
|
|
monitor_to_runner_reader, monitor_to_runner_writer = Pipe()
|
|
monitor.to_runner = monitor_to_runner_writer
|
|
self.from_monitors.append(monitor_to_runner_reader)
|
|
|
|
# Create channel to send stop messages to monitor/handler thread
|
|
self._stop_mon_han_pipe = Pipe()
|
|
self._mon_han_worker = None
|
|
|
|
# Create channel to send stop messages to handler/conductor thread
|
|
self._stop_han_con_pipe = Pipe()
|
|
self._han_con_worker = None
|
|
|
|
# Setup debugging
|
|
self._print_target, self.debug_level = setup_debugging(print, logging)
|
|
|
|
def run_monitor_handler_interaction(self)->None:
|
|
"""Function to be run in its own thread, to handle any inbound messages
|
|
from monitors. These will be events, which should be matched to an
|
|
appropriate handler and handled."""
|
|
all_inputs = self.from_monitors + [self._stop_mon_han_pipe[0]]
|
|
while True:
|
|
ready = wait(all_inputs)
|
|
|
|
# If we get a message from the stop channel, then finish
|
|
if self._stop_mon_han_pipe[0] in ready:
|
|
return
|
|
else:
|
|
for from_monitor in self.from_monitors:
|
|
if from_monitor in ready:
|
|
# Read event from the monitor channel
|
|
message = from_monitor.recv()
|
|
event = message
|
|
|
|
valid_handlers = []
|
|
for handler in self.handlers:
|
|
try:
|
|
valid, _ = handler.valid_handle_criteria(event)
|
|
if valid:
|
|
valid_handlers.append(handler)
|
|
except Exception as e:
|
|
print_debug(
|
|
self._print_target,
|
|
self.debug_level,
|
|
"Could not determine validity of event "
|
|
f"for handler. {e}",
|
|
DEBUG_INFO
|
|
)
|
|
|
|
# If we've only one handler, use that
|
|
if len(valid_handlers) == 1:
|
|
handler = valid_handlers[0]
|
|
self.handle_event(handler, event)
|
|
# If multiple handlers then randomly pick one
|
|
else:
|
|
handler = valid_handlers[
|
|
randrange(len(valid_handlers))
|
|
]
|
|
self.handle_event(handler, event)
|
|
|
|
def run_handler_conductor_interaction(self)->None:
|
|
"""Function to be run in its own thread, to handle any inbound messages
|
|
from handlers. These will be jobs, which should be matched to an
|
|
appropriate conductor and executed."""
|
|
all_inputs = self.from_handlers + [self._stop_han_con_pipe[0]]
|
|
while True:
|
|
ready = wait(all_inputs)
|
|
|
|
# If we get a message from the stop channel, then finish
|
|
if self._stop_han_con_pipe[0] in ready:
|
|
return
|
|
else:
|
|
for from_handler in self.from_handlers:
|
|
if from_handler in ready:
|
|
# Read job directory from the handler channel
|
|
job_dir = from_handler.recv()
|
|
try:
|
|
metafile = os.path.join(job_dir, META_FILE)
|
|
job = read_yaml(metafile)
|
|
except Exception as e:
|
|
print_debug(self._print_target, self.debug_level,
|
|
"Could not load necessary job definitions for "
|
|
f"job at '{job_dir}'. {e}", DEBUG_INFO)
|
|
continue
|
|
|
|
valid_conductors = []
|
|
for conductor in self.conductors:
|
|
try:
|
|
valid, _ = \
|
|
conductor.valid_execute_criteria(job)
|
|
if valid:
|
|
valid_conductors.append(conductor)
|
|
except Exception as e:
|
|
print_debug(
|
|
self._print_target,
|
|
self.debug_level,
|
|
"Could not determine validity of job "
|
|
f"for conductor. {e}",
|
|
DEBUG_INFO
|
|
)
|
|
|
|
# If we've only one conductor, use that
|
|
if len(valid_conductors) == 1:
|
|
conductor = valid_conductors[0]
|
|
self.execute_job(conductor, job_dir)
|
|
# If multiple handlers then randomly pick one
|
|
else:
|
|
conductor = valid_conductors[
|
|
randrange(len(valid_conductors))
|
|
]
|
|
self.execute_job(conductor, job_dir)
|
|
|
|
def handle_event(self, handler:BaseHandler, event:Dict[str,Any])->None:
|
|
"""Function for a given handler to handle a given event, without
|
|
crashing the runner in the event of a problem."""
|
|
print_debug(self._print_target, self.debug_level,
|
|
f"Starting handling for {event[EVENT_TYPE]} event: "
|
|
f"'{event[EVENT_PATH]}'", DEBUG_INFO)
|
|
try:
|
|
handler.handle(event)
|
|
print_debug(self._print_target, self.debug_level,
|
|
f"Completed handling for {event[EVENT_TYPE]} event: "
|
|
f"'{event[EVENT_PATH]}'", DEBUG_INFO)
|
|
except Exception as e:
|
|
print_debug(self._print_target, self.debug_level,
|
|
f"Something went wrong during handling for {event[EVENT_TYPE]}"
|
|
f" event '{event[EVENT_PATH]}'. {e}", DEBUG_INFO)
|
|
|
|
def execute_job(self, conductor:BaseConductor, job_dir:str)->None:
|
|
"""Function for a given conductor to execute a given job, without
|
|
crashing the runner in the event of a problem."""
|
|
job_id = os.path.basename(job_dir)
|
|
print_debug(
|
|
self._print_target,
|
|
self.debug_level,
|
|
f"Starting execution for job: '{job_id}'",
|
|
DEBUG_INFO
|
|
)
|
|
try:
|
|
conductor.execute(job_dir)
|
|
print_debug(
|
|
self._print_target,
|
|
self.debug_level,
|
|
f"Completed execution for job: '{job_id}'",
|
|
DEBUG_INFO
|
|
)
|
|
except Exception as e:
|
|
print_debug(
|
|
self._print_target,
|
|
self.debug_level,
|
|
f"Something went wrong in execution of job '{job_id}'. {e}",
|
|
DEBUG_INFO
|
|
)
|
|
|
|
def start(self)->None:
|
|
"""Function to start the runner by starting all of the constituent
|
|
monitors, handlers and conductors, along with managing interaction
|
|
threads."""
|
|
# Start all monitors
|
|
for monitor in self.monitors:
|
|
monitor.start()
|
|
startable = []
|
|
# Start all handlers, if they need it
|
|
for handler in self.handlers:
|
|
if hasattr(handler, "start") and handler not in startable:
|
|
startable.append()
|
|
# Start all conductors, if they need it
|
|
for conductor in self.conductors:
|
|
if hasattr(conductor, "start") and conductor not in startable:
|
|
startable.append()
|
|
for starting in startable:
|
|
starting.start()
|
|
|
|
# If we've not started the monitor/handler interaction thread yet, then
|
|
# do so
|
|
if self._mon_han_worker is None:
|
|
self._mon_han_worker = threading.Thread(
|
|
target=self.run_monitor_handler_interaction,
|
|
args=[])
|
|
self._mon_han_worker.daemon = True
|
|
self._mon_han_worker.start()
|
|
print_debug(self._print_target, self.debug_level,
|
|
"Starting MeowRunner event handling...", DEBUG_INFO)
|
|
else:
|
|
msg = "Repeated calls to start MeowRunner event handling have " \
|
|
"no effect."
|
|
print_debug(self._print_target, self.debug_level,
|
|
msg, DEBUG_WARNING)
|
|
raise RuntimeWarning(msg)
|
|
|
|
# If we've not started the handler/conductor interaction thread yet,
|
|
# then do so
|
|
if self._han_con_worker is None:
|
|
self._han_con_worker = threading.Thread(
|
|
target=self.run_handler_conductor_interaction,
|
|
args=[])
|
|
self._han_con_worker.daemon = True
|
|
self._han_con_worker.start()
|
|
print_debug(self._print_target, self.debug_level,
|
|
"Starting MeowRunner job conducting...", DEBUG_INFO)
|
|
else:
|
|
msg = "Repeated calls to start MeowRunner job conducting have " \
|
|
"no effect."
|
|
print_debug(self._print_target, self.debug_level,
|
|
msg, DEBUG_WARNING)
|
|
raise RuntimeWarning(msg)
|
|
|
|
def stop(self)->None:
|
|
"""Function to stop the runner by stopping all of the constituent
|
|
monitors, handlers and conductors, along with managing interaction
|
|
threads."""
|
|
# Stop all the monitors
|
|
for monitor in self.monitors:
|
|
monitor.stop()
|
|
|
|
stopable = []
|
|
# Stop all handlers, if they need it
|
|
for handler in self.handlers:
|
|
if hasattr(handler, "stop") and handler not in stopable:
|
|
stopable.append()
|
|
# Stop all conductors, if they need it
|
|
for conductor in self.conductors:
|
|
if hasattr(conductor, "stop") and conductor not in stopable:
|
|
stopable.append()
|
|
for stopping in stopable:
|
|
stopping.stop()
|
|
|
|
# If we've started the monitor/handler interaction thread, then stop it
|
|
if self._mon_han_worker is None:
|
|
msg = "Cannot stop event handling thread that is not started."
|
|
print_debug(self._print_target, self.debug_level,
|
|
msg, DEBUG_WARNING)
|
|
raise RuntimeWarning(msg)
|
|
else:
|
|
self._stop_mon_han_pipe[1].send(1)
|
|
self._mon_han_worker.join()
|
|
print_debug(self._print_target, self.debug_level,
|
|
"Event handler thread stopped", DEBUG_INFO)
|
|
|
|
# If we've started the handler/conductor interaction thread, then stop
|
|
# it
|
|
if self._han_con_worker is None:
|
|
msg = "Cannot stop job conducting thread that is not started."
|
|
print_debug(self._print_target, self.debug_level,
|
|
msg, DEBUG_WARNING)
|
|
raise RuntimeWarning(msg)
|
|
else:
|
|
self._stop_han_con_pipe[1].send(1)
|
|
self._han_con_worker.join()
|
|
print_debug(self._print_target, self.debug_level,
|
|
"Job conductor thread stopped", DEBUG_INFO)
|
|
|
|
def _is_valid_monitors(self,
|
|
monitors:Union[BaseMonitor,List[BaseMonitor]])->None:
|
|
"""Validation check for 'monitors' variable from main constructor."""
|
|
check_type(
|
|
monitors,
|
|
BaseMonitor,
|
|
alt_types=[List],
|
|
hint="MeowRunner.monitors"
|
|
)
|
|
if type(monitors) == list:
|
|
valid_list(monitors, BaseMonitor, min_length=1)
|
|
|
|
def _is_valid_handlers(self,
|
|
handlers:Union[BaseHandler,List[BaseHandler]])->None:
|
|
"""Validation check for 'handlers' variable from main constructor."""
|
|
check_type(
|
|
handlers,
|
|
BaseHandler,
|
|
alt_types=[List],
|
|
hint="MeowRunner.handlers"
|
|
)
|
|
if type(handlers) == list:
|
|
valid_list(handlers, BaseHandler, min_length=1)
|
|
|
|
def _is_valid_conductors(self,
|
|
conductors:Union[BaseConductor,List[BaseConductor]])->None:
|
|
"""Validation check for 'conductors' variable from main constructor."""
|
|
check_type(
|
|
conductors,
|
|
BaseConductor,
|
|
alt_types=[List],
|
|
hint="MeowRunner.conductors"
|
|
)
|
|
if type(conductors) == list:
|
|
valid_list(conductors, BaseConductor, min_length=1)
|
|
|
|
def _is_valid_job_queue_dir(self, job_queue_dir)->None:
|
|
"""Validation check for 'job_queue_dir' variable from main
|
|
constructor."""
|
|
valid_dir_path(job_queue_dir, must_exist=False)
|
|
if not os.path.exists(job_queue_dir):
|
|
make_dir(job_queue_dir)
|
|
|
|
def _is_valid_job_output_dir(self, job_output_dir)->None:
|
|
"""Validation check for 'job_output_dir' variable from main
|
|
constructor."""
|
|
valid_dir_path(job_output_dir, must_exist=False)
|
|
if not os.path.exists(job_output_dir):
|
|
make_dir(job_output_dir)
|