added rudimentary conductor for job execution

This commit is contained in:
PatchOfScotland
2023-01-26 13:47:17 +01:00
parent 75de8147be
commit 31d06af5bf
18 changed files with 1895 additions and 545 deletions

View File

@ -1,10 +1,28 @@
from datetime import datetime
from inspect import signature
from os.path import sep, exists, isfile, isdir, dirname
from typing import Any, _SpecialForm, Union, Tuple, get_origin, get_args
from core.correctness.vars import VALID_PATH_CHARS, get_not_imp_msg, \
EVENT_TYPE, EVENT_PATH
EVENT_TYPE, EVENT_PATH, JOB_EVENT, JOB_TYPE, JOB_ID, JOB_PATTERN, \
JOB_RECIPE, JOB_RULE, JOB_STATUS, JOB_CREATE_TIME
EVENT_KEYS = {
EVENT_TYPE: str,
EVENT_PATH: str
}
JOB_KEYS = {
JOB_TYPE: str,
JOB_EVENT: dict,
JOB_ID: str,
JOB_PATTERN: Any,
JOB_RECIPE: Any,
JOB_RULE: str,
JOB_STATUS: str,
JOB_CREATE_TIME: datetime,
}
def check_type(variable:Any, expected_type:type, alt_types:list[type]=[],
or_none:bool=False)->None:
@ -42,12 +60,18 @@ def check_type(variable:Any, expected_type:type, alt_types:list[type]=[],
return
if not isinstance(variable, tuple(type_list)):
print("egh")
raise TypeError(
'Expected type(s) are %s, got %s'
% (get_args(expected_type), type(variable))
)
def check_implementation(child_func, parent_class):
if not hasattr(parent_class, child_func.__name__):
raise AttributeError(
f"Parent class {parent_class} does not implement base function "
f"{child_func.__name__} for children to override.")
parent_func = getattr(parent_class, child_func.__name__)
if (child_func == parent_func):
msg = get_not_imp_msg(parent_class, parent_func)
@ -180,9 +204,15 @@ def setup_debugging(print:Any=None, logging:int=0)->Tuple[Any,int]:
return print, logging
def valid_event(event)->None:
check_type(event, dict)
if not EVENT_TYPE in event.keys():
raise KeyError(f"Events require key '{EVENT_TYPE}'")
if not EVENT_PATH in event.keys():
raise KeyError(f"Events require key '{EVENT_PATH}'")
def valid_meow_dict(meow_dict:dict[str,Any], msg:str, keys:dict[str,type])->None:
check_type(meow_dict, dict)
for key, value_type in keys.items():
if not key in meow_dict.keys():
raise KeyError(f"{msg} require key '{key}'")
check_type(meow_dict[key], value_type)
def valid_event(event:dict[str,Any])->None:
valid_meow_dict(event, "Event", EVENT_KEYS)
def valid_job(job:dict[str,Any])->None:
valid_meow_dict(job, "Job", JOB_KEYS)

View File

@ -188,11 +188,12 @@ APPENDING_NOTEBOOK = {
}
# meow events
EVENT_TYPE = "meow_event_type"
EVENT_TYPE = "event_type"
EVENT_PATH = "event_path"
WATCHDOG_TYPE = "watchdog"
WATCHDOG_BASE = "monitor_base"
WATCHDOG_RULE = "rule_name"
WATCHDOG_HASH = "file_hash"
# inotify events
FILE_CREATE_EVENT = "file_created"
@ -223,6 +224,42 @@ DIR_EVENTS = [
DIR_RETROACTIVE_EVENT
]
# meow jobs
JOB_TYPE = "job_type"
PYTHON_TYPE = "python"
PYTHON_FUNC = "func"
PYTHON_EXECUTION_BASE = "exection_base"
PYTHON_OUTPUT_DIR = "output_dir"
# job definitions
JOB_ID = "id"
JOB_EVENT = "event"
JOB_PATTERN = "pattern"
JOB_RECIPE = "recipe"
JOB_RULE = "rule"
JOB_HASH = "hash"
JOB_STATUS = "status"
JOB_CREATE_TIME = "create"
JOB_START_TIME = "start"
JOB_END_TIME = "end"
JOB_ERROR = "error"
JOB_REQUIREMENTS = "requirements"
JOB_PARAMETERS = "parameters"
# job statuses
STATUS_QUEUED = "queued"
STATUS_RUNNING = "running"
STATUS_SKIPPED = "skipped"
STATUS_FAILED = "failed"
STATUS_DONE = "done"
# job definition files
META_FILE = "job.yml"
BASE_FILE = "base.ipynb"
PARAMS_FILE = "params.yml"
JOB_FILE = "job.ipynb"
RESULT_FILE = "result.ipynb"
# debug printing levels
DEBUG_ERROR = 1
DEBUG_WARNING = 2

View File

@ -6,6 +6,8 @@ import nbformat
import os
import yaml
from datetime import datetime
from multiprocessing.connection import Connection, wait as multi_wait
from multiprocessing.queues import Queue
from papermill.translators import papermill_translators
@ -16,8 +18,23 @@ from core.correctness.validation import check_type, valid_existing_file_path, \
valid_path
from core.correctness.vars import CHAR_LOWERCASE, CHAR_UPPERCASE, \
VALID_CHANNELS, HASH_BUFFER_SIZE, SHA256, DEBUG_WARNING, DEBUG_INFO, \
EVENT_TYPE, EVENT_PATH
EVENT_TYPE, EVENT_PATH, JOB_EVENT, JOB_TYPE, JOB_ID, JOB_PATTERN, \
JOB_RECIPE, JOB_RULE, WATCHDOG_RULE, JOB_STATUS, STATUS_QUEUED, \
JOB_CREATE_TIME, JOB_REQUIREMENTS
# mig trigger keyword replacements
KEYWORD_PATH = "{PATH}"
KEYWORD_REL_PATH = "{REL_PATH}"
KEYWORD_DIR = "{DIR}"
KEYWORD_REL_DIR = "{REL_DIR}"
KEYWORD_FILENAME = "{FILENAME}"
KEYWORD_PREFIX = "{PREFIX}"
KEYWORD_BASE = "{VGRID}"
KEYWORD_EXTENSION = "{EXTENSION}"
KEYWORD_JOB = "{JOB}"
#TODO Make this guaranteed unique
def generate_id(prefix:str="", length:int=16, existing_ids:list[str]=[],
charset:str=CHAR_UPPERCASE+CHAR_LOWERCASE, attempts:int=24):
random_length = max(length - len(prefix), 0)
@ -100,19 +117,16 @@ def make_dir(path:str, can_exist:bool=True, ensure_clean:bool=False):
:return: No return
"""
if not os.path.exists(path):
os.mkdir(path)
elif os.path.isfile(path):
raise ValueError('Cannot make directory in %s as it already '
'exists and is a file' % path)
else:
if not can_exist:
if ensure_clean:
rmtree(path)
os.mkdir(path)
else:
raise ValueError("Directory %s already exists. " % path)
if os.path.exists(path):
if os.path.isfile(path):
raise ValueError(
f"Cannot make directory in {path} as it already exists and is "
"a file")
if ensure_clean:
rmtree(path)
os.makedirs(path, exist_ok=can_exist)
def read_yaml(filepath:str):
"""
Reads a file path as a yaml object.
@ -124,7 +138,7 @@ def read_yaml(filepath:str):
with open(filepath, 'r') as yaml_file:
return yaml.load(yaml_file, Loader=yaml.Loader)
def write_yaml(source:Any, filename:str, mode:str='w'):
def write_yaml(source:Any, filename:str):
"""
Writes a given objcet to a yaml file.
@ -134,7 +148,7 @@ def write_yaml(source:Any, filename:str, mode:str='w'):
:return: No return
"""
with open(filename, mode) as param_file:
with open(filename, 'w') as param_file:
yaml.dump(source, param_file, default_flow_style=False)
def read_notebook(filepath:str):
@ -241,6 +255,51 @@ def print_debug(print_target, debug_level, msg, level)->None:
status = "WARNING"
print(f"{status}: {msg}", file=print_target)
def create_event(event_type:str, path:str, source:dict[Any,Any]={})->dict[Any,Any]:
def replace_keywords(old_dict:dict[str,str], job_id:str, src_path:str,
monitor_base:str)->dict[str,str]:
new_dict = {}
filename = os.path.basename(src_path)
dirname = os.path.dirname(src_path)
relpath = os.path.relpath(src_path, monitor_base)
reldirname = os.path.dirname(relpath)
(prefix, extension) = os.path.splitext(filename)
for var, val in old_dict.items():
if isinstance(val, str):
val = val.replace(KEYWORD_PATH, src_path)
val = val.replace(KEYWORD_REL_PATH, relpath)
val = val.replace(KEYWORD_DIR, dirname)
val = val.replace(KEYWORD_REL_DIR, reldirname)
val = val.replace(KEYWORD_FILENAME, filename)
val = val.replace(KEYWORD_PREFIX, prefix)
val = val.replace(KEYWORD_BASE, monitor_base)
val = val.replace(KEYWORD_EXTENSION, extension)
val = val.replace(KEYWORD_JOB, job_id)
new_dict[var] = val
else:
new_dict[var] = val
return new_dict
def create_event(event_type:str, path:str, source:dict[Any,Any]={}
)->dict[Any,Any]:
return {**source, EVENT_PATH: path, EVENT_TYPE: event_type}
def create_job(job_type:str, event:dict[str,Any], source:dict[Any,Any]={}
)->dict[Any,Any]:
job_dict = {
#TODO compress pattern, recipe, rule?
JOB_ID: generate_id(prefix="job_"),
JOB_EVENT: event,
JOB_TYPE: job_type,
JOB_PATTERN: event[WATCHDOG_RULE].pattern,
JOB_RECIPE: event[WATCHDOG_RULE].recipe,
JOB_RULE: event[WATCHDOG_RULE].name,
JOB_STATUS: STATUS_QUEUED,
JOB_CREATE_TIME: datetime.now(),
JOB_REQUIREMENTS: event[WATCHDOG_RULE].recipe.requirements
}
return {**source, **job_dict}

View File

@ -2,6 +2,7 @@
import inspect
import sys
from copy import deepcopy
from typing import Any, Union
from core.correctness.vars import VALID_RECIPE_NAME_CHARS, \
@ -150,8 +151,8 @@ class BaseMonitor:
check_implementation(type(self).remove_recipe, BaseMonitor)
check_implementation(type(self).get_recipes, BaseMonitor)
check_implementation(type(self).get_rules, BaseMonitor)
self._patterns = patterns
self._recipes = recipes
self._patterns = deepcopy(patterns)
self._recipes = deepcopy(recipes)
self._rules = create_rules(patterns, recipes)
def __new__(cls, *args, **kwargs):
@ -201,7 +202,8 @@ class BaseMonitor:
class BaseHandler:
def __init__(self) -> None:
to_runner: VALID_CHANNELS
def __init__(self)->None:
check_implementation(type(self).handle, BaseHandler)
check_implementation(type(self).valid_event_types, BaseHandler)
@ -214,9 +216,22 @@ class BaseHandler:
def valid_event_types(self)->list[str]:
pass
def handle(self, event:Any)->None:
def handle(self, event:dict[str,Any])->None:
pass
class BaseConductor:
def __init__(self)->None:
check_implementation(type(self).execute, BaseConductor)
check_implementation(type(self).valid_job_types, BaseConductor)
def valid_job_types(self)->list[str]:
pass
def execute(self, job:dict[str,Any])->None:
pass
def create_rules(patterns:Union[dict[str,BasePattern],list[BasePattern]],
recipes:Union[dict[str,BaseRecipe],list[BaseRecipe]],
new_rules:list[BaseRule]=[])->dict[str,BaseRule]:

View File

@ -2,36 +2,69 @@
import sys
import threading
from inspect import signature
from multiprocessing import Pipe
from random import randrange
from typing import Any, Union
from core.correctness.vars import DEBUG_WARNING, DEBUG_INFO, EVENT_TYPE, \
VALID_CHANNELS
VALID_CHANNELS, JOB_TYPE, JOB_ID
from core.correctness.validation import setup_debugging, check_type, \
valid_list
from core.functionality import print_debug, wait
from core.meow import BaseHandler, BaseMonitor
from core.meow import BaseHandler, BaseMonitor, BaseConductor
class MeowRunner:
monitors:list[BaseMonitor]
handlers:dict[str:BaseHandler]
from_monitor: list[VALID_CHANNELS]
conductors:dict[str:BaseConductor]
from_monitors: list[VALID_CHANNELS]
from_handlers: list[VALID_CHANNELS]
def __init__(self, monitors:Union[BaseMonitor,list[BaseMonitor]],
handlers:Union[BaseHandler,list[BaseHandler]],
print:Any=sys.stdout, logging:int=0) -> None:
conductors:Union[BaseConductor,list[BaseConductor]],
print:Any=sys.stdout, logging:int=0)->None:
self._is_valid_conductors(conductors)
if not type(conductors) == list:
conductors = [conductors]
self.conductors = {}
for conductor in conductors:
conductor_jobs = conductor.valid_job_types()
if not conductor_jobs:
raise ValueError(
"Cannot start runner with conductor that does not "
f"implement '{BaseConductor.valid_job_types.__name__}"
f"({signature(BaseConductor.valid_job_types)})' and "
"return a list of at least one conductable job.")
for job in conductor_jobs:
if job in self.conductors.keys():
self.conductors[job].append(conductor)
else:
self.conductors[job] = [conductor]
self._is_valid_handlers(handlers)
if not type(handlers) == list:
handlers = [handlers]
self.handlers = {}
self.from_handlers = []
for handler in handlers:
handler_events = handler.valid_event_types()
if not handler_events:
raise ValueError(
"Cannot start runner with handler that does not "
f"implement '{BaseHandler.valid_event_types.__name__}"
f"({signature(BaseHandler.valid_event_types)})' and "
"return a list of at least one handlable event.")
for event in handler_events:
if event in self.handlers.keys():
self.handlers[event].append(handler)
else:
self.handlers[event] = [handler]
handler_to_runner_reader, handler_to_runner_writer = Pipe()
handler.to_runner = handler_to_runner_writer
self.from_handlers.append(handler_to_runner_reader)
self._is_valid_monitors(monitors)
if not type(monitors) == list:
@ -43,16 +76,20 @@ class MeowRunner:
monitor.to_runner = monitor_to_runner_writer
self.from_monitors.append(monitor_to_runner_reader)
self._stop_pipe = Pipe()
self._worker = None
self._stop_mon_han_pipe = Pipe()
self._mon_han_worker = None
self._stop_han_con_pipe = Pipe()
self._han_con_worker = None
self._print_target, self.debug_level = setup_debugging(print, logging)
def run(self)->None:
all_inputs = self.from_monitors + [self._stop_pipe[0]]
def run_monitor_handler_interaction(self)->None:
all_inputs = self.from_monitors + [self._stop_mon_han_pipe[0]]
while True:
ready = wait(all_inputs)
if self._stop_pipe[0] in ready:
if self._stop_mon_han_pipe[0] in ready:
return
else:
for from_monitor in self.from_monitors:
@ -62,7 +99,8 @@ class MeowRunner:
if not self.handlers[event[EVENT_TYPE]]:
print_debug(self._print_target, self.debug_level,
"Could not process event as no relevent "
f"handler for '{EVENT_TYPE}'", DEBUG_INFO)
f"handler for '{event[EVENT_TYPE]}'",
DEBUG_INFO)
return
if len(self.handlers[event[EVENT_TYPE]]) == 1:
self.handlers[event[EVENT_TYPE]][0].handle(event)
@ -71,6 +109,44 @@ class MeowRunner:
randrange(len(self.handlers[event[EVENT_TYPE]]))
].handle(event)
def run_handler_conductor_interaction(self)->None:
all_inputs = self.from_handlers + [self._stop_han_con_pipe[0]]
while True:
ready = wait(all_inputs)
if self._stop_han_con_pipe[0] in ready:
return
else:
for from_handler in self.from_handlers:
if from_handler in ready:
message = from_handler.recv()
job = message
if not self.conductors[job[JOB_TYPE]]:
print_debug(self._print_target, self.debug_level,
"Could not process job as no relevent "
f"conductor for '{job[JOB_TYPE]}'", DEBUG_INFO)
return
if len(self.conductors[job[JOB_TYPE]]) == 1:
conductor = self.conductors[job[JOB_TYPE]][0]
self.execute_job(conductor, job)
else:
conductor = self.conductors[job[JOB_TYPE]][
randrange(len(self.conductors[job[JOB_TYPE]]))
]
self.execute_job(conductor, job)
def execute_job(self, conductor:BaseConductor, job:dict[str:Any])->None:
print_debug(self._print_target, self.debug_level,
f"Starting execution for job: '{job[JOB_ID]}'", DEBUG_INFO)
try:
conductor.execute(job)
print_debug(self._print_target, self.debug_level,
f"Completed execution for job: '{job[JOB_ID]}'", DEBUG_INFO)
except Exception as e:
print_debug(self._print_target, self.debug_level,
"Something went wrong during execution for job "
f"'{job[JOB_ID]}'. {e}", DEBUG_INFO)
def start(self)->None:
for monitor in self.monitors:
monitor.start()
@ -79,23 +155,42 @@ class MeowRunner:
for handler in handler_list:
if hasattr(handler, "start") and handler not in startable:
startable.append()
for handler in startable:
handler.start()
if self._worker is None:
self._worker = threading.Thread(
target=self.run,
for conductor_list in self.conductors.values():
for conductor in conductor_list:
if hasattr(conductor, "start") and conductor not in startable:
startable.append()
for starting in startable:
starting.start()
if self._mon_han_worker is None:
self._mon_han_worker = threading.Thread(
target=self.run_monitor_handler_interaction,
args=[])
self._worker.daemon = True
self._worker.start()
self._mon_han_worker.daemon = True
self._mon_han_worker.start()
print_debug(self._print_target, self.debug_level,
"Starting MeowRunner run...", DEBUG_INFO)
"Starting MeowRunner event handling...", DEBUG_INFO)
else:
msg = "Repeated calls to start have no effect."
msg = "Repeated calls to start MeowRunner event handling have " \
"no effect."
print_debug(self._print_target, self.debug_level,
msg, DEBUG_WARNING)
raise RuntimeWarning(msg)
if self._han_con_worker is None:
self._han_con_worker = threading.Thread(
target=self.run_handler_conductor_interaction,
args=[])
self._han_con_worker.daemon = True
self._han_con_worker.start()
print_debug(self._print_target, self.debug_level,
"Starting MeowRunner job conducting...", DEBUG_INFO)
else:
msg = "Repeated calls to start MeowRunner job conducting have " \
"no effect."
print_debug(self._print_target, self.debug_level,
msg, DEBUG_WARNING)
raise RuntimeWarning(msg)
def stop(self)->None:
for monitor in self.monitors:
@ -106,29 +201,49 @@ class MeowRunner:
for handler in handler_list:
if hasattr(handler, "stop") and handler not in stopable:
stopable.append()
for handler in stopable:
handler.stop()
for conductor_list in self.conductors.values():
for conductor in conductor_list:
if hasattr(conductor, "stop") and conductor not in stopable:
stopable.append()
for stopping in stopable:
stopping.stop()
if self._worker is None:
msg = "Cannot stop thread that is not started."
if self._mon_han_worker is None:
msg = "Cannot stop event handling thread that is not started."
print_debug(self._print_target, self.debug_level,
msg, DEBUG_WARNING)
raise RuntimeWarning(msg)
else:
self._stop_pipe[1].send(1)
self._worker.join()
self._stop_mon_han_pipe[1].send(1)
self._mon_han_worker.join()
print_debug(self._print_target, self.debug_level,
"Worker thread stopped", DEBUG_INFO)
"Event handler thread stopped", DEBUG_INFO)
if self._han_con_worker is None:
msg = "Cannot stop job conducting thread that is not started."
print_debug(self._print_target, self.debug_level,
msg, DEBUG_WARNING)
raise RuntimeWarning(msg)
else:
self._stop_han_con_pipe[1].send(1)
self._han_con_worker.join()
print_debug(self._print_target, self.debug_level,
"Job conductor thread stopped", DEBUG_INFO)
def _is_valid_monitors(self,
monitors:Union[BaseMonitor,list[BaseMonitor]])->None:
check_type(monitors, BaseMonitor, alt_types=[list[BaseMonitor]])
check_type(monitors, BaseMonitor, alt_types=[list])
if type(monitors) == list:
valid_list(monitors, BaseMonitor, min_length=1)
def _is_valid_handlers(self,
handlers:Union[BaseHandler,list[BaseHandler]])->None:
check_type(handlers, BaseHandler, alt_types=[list[BaseHandler]])
check_type(handlers, BaseHandler, alt_types=[list])
if type(handlers) == list:
valid_list(handlers, BaseHandler, min_length=1)
def _is_valid_conductors(self,
conductors:Union[BaseConductor,list[BaseConductor]])->None:
check_type(conductors, BaseConductor, alt_types=[list])
if type(conductors) == list:
valid_list(conductors, BaseConductor, min_length=1)