Files
meow_base/core/base_handler.py
2023-06-09 10:57:08 +02:00

271 lines
11 KiB
Python

"""
This file contains the base MEOW handler defintion. This should be inherited
from for all handler instances.
Author(s): David Marchant
"""
import os
import stat
from threading import Event, Thread
from typing import Any, Tuple, Dict, Union
from time import sleep
from meow_base.core.vars import VALID_CHANNELS, EVENT_RULE, EVENT_PATH, \
VALID_HANDLER_NAME_CHARS, META_FILE, JOB_ID, JOB_FILE, JOB_PARAMETERS, \
get_drt_imp_msg
from meow_base.core.meow import valid_event
from meow_base.patterns.file_event_pattern import WATCHDOG_HASH
from meow_base.functionality.file_io import threadsafe_write_status, \
threadsafe_update_status, make_dir, write_file, lines_to_string
from meow_base.functionality.validation import check_implementation, \
valid_string, valid_natural
from meow_base.functionality.meow import create_job_metadata_dict, \
replace_keywords
from meow_base.functionality.naming import generate_handler_id
class BaseHandler:
# An identifier for a handler within the runner. Can be manually set in
# the constructor, or autogenerated if no name provided.
name:str
# A channel for sending messages to the runner event queue. Note that this
# will be overridden by a MeowRunner, if a handler instance is passed to
# it, and so does not need to be initialised within the handler itself,
# unless the handler is running independently of a runner.
to_runner_event: VALID_CHANNELS
# A channel for sending messages to the runner job queue. Note that this
# will be overridden by a MeowRunner, if a handler instance is passed to
# it, and so does not need to be initialised within the handler itself,
# unless the handler is running independently of a runner.
to_runner_job: VALID_CHANNELS
# Directory where queued jobs are initially written to. Note that this
# will be overridden by a MeowRunner, if a handler instance is passed to
# it, and so does not need to be initialised within the handler itself.
job_queue_dir:str
# A count, for how long a handler will wait if told that there are no
# events in the runner, before polling again. Default is 5 seconds.
pause_time: int
def __init__(self, name:str='', pause_time:int=5)->None:
"""BaseHandler Constructor. This will check that any class inheriting
from it implements its validation functions."""
check_implementation(type(self).valid_handle_criteria, BaseHandler)
check_implementation(type(self).get_created_job_type, BaseHandler)
check_implementation(type(self).create_job_recipe_file, BaseHandler)
if not name:
name = generate_handler_id()
self._is_valid_name(name)
self.name = name
self._is_valid_pause_time(pause_time)
self.pause_time = pause_time
def __new__(cls, *args, **kwargs):
"""A check that this base class is not instantiated itself, only
inherited from"""
if cls is BaseHandler:
msg = get_drt_imp_msg(BaseHandler)
raise TypeError(msg)
return object.__new__(cls)
def _is_valid_name(self, name:str)->None:
"""Validation check for 'name' variable from main constructor. Is
automatically called during initialisation. This does not need to be
overridden by child classes."""
valid_string(name, VALID_HANDLER_NAME_CHARS)
def _is_valid_pause_time(self, pause_time:int)->None:
"""Validation check for 'pause_time' variable from main constructor. Is
automatically called during initialisation. This does not need to be
overridden by child classes."""
valid_natural(pause_time, hint="BaseHandler.pause_time")
def prompt_runner_for_event(self)->Union[Dict[str,Any],Any]:
self.to_runner_event.send(1)
if self.to_runner_event.poll(self.pause_time):
return self.to_runner_event.recv()
return None
def send_job_to_runner(self, job_id:str)->None:
self.to_runner_job.send(job_id)
def start(self)->None:
"""Function to start the handler as an ongoing thread, as defined by
the main_loop function. Together, these will execute any code in a
implemented handlers handle function sequentially, but concurrently to
any other handlers running or other runner operations. This is intended
as a naive mmultiprocessing implementation, and any more in depth
parallelisation of execution must be implemented by a user by
overriding this function, and the stop function."""
self._stop_event = Event()
self._handle_thread = Thread(
target=self.main_loop,
args=(self._stop_event,),
daemon=True,
name="handler_thread"
)
self._handle_thread.start()
def stop(self)->None:
"""Function to stop the handler as an ongoing thread. May be overidden
by any child class. This function should also be overriden if the start
function has been."""
self._stop_event.set()
self._handle_thread.join()
def main_loop(self, stop_event)->None:
"""Function defining an ongoing thread, as started by the start
function and stoped by the stop function. """
while not stop_event.is_set():
reply = self.prompt_runner_for_event()
# If we have recieved 'None' then we have already timed out so skip
# this loop and start again
if reply is None:
continue
try:
valid_event(reply)
except Exception as e:
# Were not given an event, so sleep before trying again
sleep(self.pause_time)
try:
self.handle(reply)
except Exception as e:
# TODO some error reporting here
if not isinstance(e, TypeError):
raise e
def valid_handle_criteria(self, event:Dict[str,Any])->Tuple[bool,str]:
"""Function to determine given an event defintion, if this handler can
process it or not. Must be implemented by any child process."""
pass
def handle(self, event:Dict[str,Any])->None:
"""Function to handle a given event. May be overridden by any child
process. Note that once any handling has occured, the
send_job_to_runner function should be called to inform the runner of
any resultant jobs."""
rule = event[EVENT_RULE]
# Assemble job parameters dict from pattern variables
yaml_dict = {}
for var, val in rule.pattern.parameters.items():
yaml_dict[var] = val
for var, val in rule.pattern.outputs.items():
yaml_dict[var] = val
# yaml_dict[rule.pattern.triggering_file] = event[EVENT_PATH]
# If no parameter sweeps, then one job will suffice
if not rule.pattern.sweep:
self.setup_job(event, yaml_dict)
else:
# If parameter sweeps, then many jobs created
values_list = rule.pattern.expand_sweeps()
for values in values_list:
for value in values:
yaml_dict[value[0]] = value[1]
self.setup_job(event, yaml_dict)
def setup_job(self, event:Dict[str,Any], params_dict:Dict[str,Any])->None:
"""Function to set up new job dict and send it to the runner to be
executed."""
# Get base job metadata
meow_job = self.create_job_metadata_dict(event, params_dict)
# Get updated job parameters
# TODO replace this with generic implementation
from meow_base.patterns.file_event_pattern import WATCHDOG_BASE
params_dict = replace_keywords(
params_dict,
meow_job[JOB_ID],
event[EVENT_PATH],
event[WATCHDOG_BASE]
)
# Create a base job directory
job_dir = os.path.join(self.job_queue_dir, meow_job[JOB_ID])
make_dir(job_dir)
# Create job metadata file
meta_file = self.create_job_meta_file(job_dir, meow_job)
# Create job recipe file
recipe_command = self.create_job_recipe_file(job_dir, event, params_dict)
# Create job script file
script_command = self.create_job_script_file(job_dir, recipe_command)
threadsafe_update_status(
{
# TODO make me not tmp variables and update job dict validation
"tmp recipe command": recipe_command,
"tmp script command": script_command
},
meta_file
)
# Send job directory, as actual definitons will be read from within it
self.send_job_to_runner(job_dir)
def get_created_job_type(self)->str:
pass # Must implemented
def create_job_metadata_dict(self, event:Dict[str,Any],
params_dict:Dict[str,Any])->Dict[str,Any]:
return create_job_metadata_dict(
self.get_created_job_type(),
event,
extras={
JOB_PARAMETERS:params_dict
}
)
def create_job_meta_file(self, job_dir:str, meow_job:Dict[str,Any]
)->Dict[str,Any]:
meta_file = os.path.join(job_dir, META_FILE)
threadsafe_write_status(meow_job, meta_file)
return meta_file
def create_job_recipe_file(self, job_dir:str, event:Dict[str,Any], params_dict:Dict[str,Any]
)->str:
pass # Must implemented
def create_job_script_file(self, job_dir:str, recipe_command:str)->str:
# TODO Make this more generic, so only checking hashes if that is present
job_script = [
"#!/bin/bash",
"",
"# Get job params",
f"given_hash=$(grep '{WATCHDOG_HASH}: *' $(dirname $0)/job.yml | tail -n1 | cut -c 14-)",
f"event_path=$(grep '{EVENT_PATH}: *' $(dirname $0)/job.yml | tail -n1 | cut -c 15-)",
"",
"echo event_path: $event_path",
"echo given_hash: $given_hash",
"",
"# Check hash of input file to avoid race conditions",
"actual_hash=$(sha256sum $event_path | cut -c -64)",
"echo actual_hash: $actual_hash",
"if [ \"$given_hash\" != \"$actual_hash\" ]; then",
" echo Job was skipped as triggering file has been modified since scheduling",
" exit 134",
"fi",
"",
"# Call actual job script",
recipe_command,
"",
"exit $?"
]
job_file = os.path.join(job_dir, JOB_FILE)
write_file(lines_to_string(job_script), job_file)
os.chmod(job_file, stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH )
return os.path.join(".", JOB_FILE)