added standardised job creation

This commit is contained in:
PatchOfScotland
2023-04-22 21:48:33 +02:00
parent f306d8b6f2
commit d3eb2dbf9f
15 changed files with 515 additions and 1281 deletions

View File

@ -5,15 +5,25 @@ from for all conductor instances.
Author(s): David Marchant
"""
import shutil
import subprocess
import os
from datetime import datetime
from threading import Event, Thread
from time import sleep
from typing import Any, Tuple, Dict, Union
from meow_base.core.meow import valid_job
from meow_base.core.vars import VALID_CONDUCTOR_NAME_CHARS, VALID_CHANNELS, \
JOB_STATUS, JOB_START_TIME, META_FILE, STATUS_RUNNING, STATUS_DONE , \
BACKUP_JOB_ERROR_FILE, JOB_END_TIME, STATUS_FAILED, JOB_ERROR, \
get_drt_imp_msg
from meow_base.functionality.file_io import write_file, \
threadsafe_read_status, threadsafe_update_status
from meow_base.functionality.validation import check_implementation, \
valid_string, valid_existing_dir_path, valid_natural
valid_string, valid_existing_dir_path, valid_natural, valid_dir_path
from meow_base.functionality.naming import generate_conductor_id
@ -40,7 +50,6 @@ class BaseConductor:
def __init__(self, name:str="", pause_time:int=5)->None:
"""BaseConductor Constructor. This will check that any class inheriting
from it implements its validation functions."""
check_implementation(type(self).execute, BaseConductor)
check_implementation(type(self).valid_execute_criteria, BaseConductor)
if not name:
name = generate_conductor_id()
@ -129,7 +138,93 @@ class BaseConductor:
process it or not. Must be implemented by any child process."""
pass
def run_job(self, job_dir:str)->None:
"""Function to actually execute a job. This will read job
defintions from its meta file, update the meta file and attempt to
execute. Some unspecific feedback will be given on execution failure,
but depending on what it is it may be up to the job itself to provide
more detailed feedback. If you simply wish to alter the conditions
under which the job is executed, please instead look at the execute
function."""
valid_dir_path(job_dir, must_exist=True)
# Test our job parameters. Even if its gibberish, we still move to
# output
abort = False
try:
meta_file = os.path.join(job_dir, META_FILE)
job = threadsafe_read_status(meta_file)
valid_job(job)
# update the status file with running status
threadsafe_update_status(
{
JOB_STATUS: STATUS_RUNNING,
JOB_START_TIME: datetime.now()
},
meta_file
)
except Exception as e:
# If something has gone wrong at this stage then its bad, so we
# need to make our own error file
error_file = os.path.join(job_dir, BACKUP_JOB_ERROR_FILE)
write_file(f"Recieved incorrectly setup job.\n\n{e}", error_file)
abort = True
# execute the job
if not abort:
try:
result = subprocess.call(
os.path.join(job_dir, job["tmp script command"]),
cwd="."
)
if result == 0:
# Update the status file with the finalised status
threadsafe_update_status(
{
JOB_STATUS: STATUS_DONE,
JOB_END_TIME: datetime.now()
},
meta_file
)
else:
# Update the status file with the error status. Don't
# overwrite any more specific error messages already
# created
threadsafe_update_status(
{
JOB_STATUS: STATUS_FAILED,
JOB_END_TIME: datetime.now(),
JOB_ERROR: "Job execution returned non-zero."
},
meta_file
)
except Exception as e:
# Update the status file with the error status. Don't overwrite
# any more specific error messages already created
threadsafe_update_status(
{
JOB_STATUS: STATUS_FAILED,
JOB_END_TIME: datetime.now(),
JOB_ERROR: f"Job execution failed. {e}"
},
meta_file
)
# Move the contents of the execution directory to the final output
# directory.
job_output_dir = \
os.path.join(self.job_output_dir, os.path.basename(job_dir))
shutil.move(job_dir, job_output_dir)
def execute(self, job_dir:str)->None:
"""Function to execute a given job directory. Must be implemented by
any child process."""
pass
"""Function to run job execution. By default this will simply call the
run_job function, to execute the job locally. However, this function
may be overridden to execute the job in some other manner, such as on
another resource. Note that the job itself should be executed using the
run_job func in order to maintain expected logging etc."""
self.run_job(job_dir)

View File

@ -6,16 +6,23 @@ from for all handler instances.
Author(s): David Marchant
"""
import os
import stat
from threading import Event, Thread
from typing import Any, Tuple, Dict, Union
from time import sleep
from meow_base.core.vars import VALID_CHANNELS, \
VALID_HANDLER_NAME_CHARS, get_drt_imp_msg
from meow_base.core.vars import VALID_CHANNELS, EVENT_RULE, EVENT_PATH, \
VALID_HANDLER_NAME_CHARS, META_FILE, JOB_ID, WATCHDOG_BASE, JOB_FILE, \
JOB_PARAMETERS, get_drt_imp_msg
from meow_base.core.meow import valid_event
from meow_base.functionality.file_io import threadsafe_write_status, \
threadsafe_update_status, make_dir, write_file, lines_to_string
from meow_base.functionality.validation import check_implementation, \
valid_string, valid_natural
from meow_base.functionality.meow import create_job_metadata_dict, \
replace_keywords
from meow_base.functionality.naming import generate_handler_id
class BaseHandler:
@ -42,8 +49,9 @@ class BaseHandler:
def __init__(self, name:str='', pause_time:int=5)->None:
"""BaseHandler Constructor. This will check that any class inheriting
from it implements its validation functions."""
check_implementation(type(self).handle, BaseHandler)
check_implementation(type(self).valid_handle_criteria, BaseHandler)
check_implementation(type(self).get_created_job_type, BaseHandler)
check_implementation(type(self).create_job_recipe_file, BaseHandler)
if not name:
name = generate_handler_id()
self._is_valid_name(name)
@ -137,8 +145,124 @@ class BaseHandler:
pass
def handle(self, event:Dict[str,Any])->None:
"""Function to handle a given event. Must be implemented by any child
"""Function to handle a given event. May be overridden by any child
process. Note that once any handling has occured, the
send_job_to_runner function should be called to inform the runner of
any resultant jobs."""
pass
rule = event[EVENT_RULE]
# Assemble job parameters dict from pattern variables
yaml_dict = {}
for var, val in rule.pattern.parameters.items():
yaml_dict[var] = val
for var, val in rule.pattern.outputs.items():
yaml_dict[var] = val
yaml_dict[rule.pattern.triggering_file] = event[EVENT_PATH]
# If no parameter sweeps, then one job will suffice
if not rule.pattern.sweep:
self.setup_job(event, yaml_dict)
else:
# If parameter sweeps, then many jobs created
values_list = rule.pattern.expand_sweeps()
for values in values_list:
for value in values:
yaml_dict[value[0]] = value[1]
self.setup_job(event, yaml_dict)
def setup_job(self, event:Dict[str,Any], params_dict:Dict[str,Any])->None:
"""Function to set up new job dict and send it to the runner to be
executed."""
# Get base job metadata
meow_job = self.create_job_metadata_dict(event, params_dict)
# Get updated job parameters
# TODO replace this with generic implementation
params_dict = replace_keywords(
params_dict,
meow_job[JOB_ID],
event[EVENT_PATH],
event[WATCHDOG_BASE]
)
# Create a base job directory
job_dir = os.path.join(self.job_queue_dir, meow_job[JOB_ID])
make_dir(job_dir)
# Create job metadata file
meta_file = self.create_job_meta_file(job_dir, meow_job)
# Create job recipe file
recipe_command = self.create_job_recipe_file(job_dir, event, params_dict)
# Create job script file
script_command = self.create_job_script_file(job_dir, recipe_command)
threadsafe_update_status(
{
# TODO make me not tmp variables and update job dict validation
"tmp recipe command": recipe_command,
"tmp script command": script_command
},
meta_file
)
# Send job directory, as actual definitons will be read from within it
self.send_job_to_runner(job_dir)
def get_created_job_type(self)->str:
pass # Must implemented
def create_job_metadata_dict(self, event:Dict[str,Any],
params_dict:Dict[str,Any])->Dict[str,Any]:
return create_job_metadata_dict(
self.get_created_job_type(),
event,
extras={
JOB_PARAMETERS:params_dict
}
)
def create_job_meta_file(self, job_dir:str, meow_job:Dict[str,Any]
)->Dict[str,Any]:
meta_file = os.path.join(job_dir, META_FILE)
threadsafe_write_status(meow_job, meta_file)
return meta_file
def create_job_recipe_file(self, job_dir:str, event:Dict[str,Any], params_dict:Dict[str,Any]
)->str:
pass # Must implemented
def create_job_script_file(self, job_dir:str, recipe_command:str)->str:
# TODO Make this more generic, so only checking hashes if that is present
job_script = [
"#!/bin/bash",
"",
"# Get job params",
"given_hash=$(grep 'file_hash: *' $(dirname $0)/job.yml | tail -n1 | cut -c 14-)",
"event_path=$(grep 'event_path: *' $(dirname $0)/job.yml | tail -n1 | cut -c 15-)",
"",
"echo event_path: $event_path",
"echo given_hash: $given_hash",
"",
"# Check hash of input file to avoid race conditions",
"actual_hash=$(sha256sum $event_path | cut -c -64)",
"echo actual_hash: $actual_hash",
"if [ $given_hash != $actual_hash ]; then",
" echo Job was skipped as triggering file has been modified since scheduling",
" exit 134",
"fi",
"",
"# Call actual job script",
recipe_command,
"",
"exit $?"
]
job_file = os.path.join(job_dir, JOB_FILE)
write_file(lines_to_string(job_script), job_file)
os.chmod(job_file, stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH )
return os.path.join(".", JOB_FILE)

View File

@ -17,11 +17,13 @@ from meow_base.core.base_conductor import BaseConductor
from meow_base.core.base_handler import BaseHandler
from meow_base.core.base_monitor import BaseMonitor
from meow_base.core.vars import DEBUG_WARNING, DEBUG_INFO, \
VALID_CHANNELS, META_FILE, DEFAULT_JOB_OUTPUT_DIR, DEFAULT_JOB_QUEUE_DIR
VALID_CHANNELS, META_FILE, DEFAULT_JOB_OUTPUT_DIR, DEFAULT_JOB_QUEUE_DIR, \
JOB_STATUS, STATUS_QUEUED
from meow_base.functionality.validation import check_type, valid_list, \
valid_dir_path, check_implementation
from meow_base.functionality.debug import setup_debugging, print_debug
from meow_base.functionality.file_io import make_dir, threadsafe_read_status
from meow_base.functionality.file_io import make_dir, threadsafe_read_status, \
threadsafe_update_status
from meow_base.functionality.process_io import wait
@ -183,13 +185,20 @@ class MeowRunner:
message = connection.recv()
# Recieved an event
# Recieved a job
if isinstance(component, BaseHandler):
self.job_queue.append(message)
threadsafe_update_status(
{
JOB_STATUS: STATUS_QUEUED
},
os.path.join(message, META_FILE)
)
continue
# Recieved a request for an event
# Recieved a request for a job
if isinstance(component, BaseConductor):
valid = False
print(f"Got request for job")
for job_dir in self.job_queue:
try:
metafile = os.path.join(job_dir, META_FILE)

View File

@ -85,6 +85,7 @@ DEFAULT_JOB_QUEUE_DIR = "job_queue"
DEFAULT_JOB_OUTPUT_DIR = "job_output"
# meow jobs
JOB_FILE = "job.sh"
JOB_TYPE = "job_type"
JOB_TYPE_BASH = "bash"
JOB_TYPE_PYTHON = "python"
@ -125,6 +126,7 @@ JOB_REQUIREMENTS = "requirements"
JOB_PARAMETERS = "parameters"
# job statuses
STATUS_CREATING = "creating"
STATUS_QUEUED = "queued"
STATUS_RUNNING = "running"
STATUS_SKIPPED = "skipped"
@ -157,12 +159,3 @@ def get_not_imp_msg(parent_class, class_function):
return f"Children of the '{parent_class.__name__}' class must implement " \
f"the '{class_function.__name__}({signature(class_function)})' " \
"function"
def get_base_file(job_type:str):
return JOB_TYPES[job_type][0]
def get_job_file(job_type:str):
return JOB_TYPES[job_type][1]
def get_result_file(job_type:str):
return JOB_TYPES[job_type][2]