added standardised job creation
This commit is contained in:
@ -5,15 +5,25 @@ from for all conductor instances.
|
||||
|
||||
Author(s): David Marchant
|
||||
"""
|
||||
import shutil
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
from datetime import datetime
|
||||
from threading import Event, Thread
|
||||
from time import sleep
|
||||
from typing import Any, Tuple, Dict, Union
|
||||
|
||||
|
||||
from meow_base.core.meow import valid_job
|
||||
from meow_base.core.vars import VALID_CONDUCTOR_NAME_CHARS, VALID_CHANNELS, \
|
||||
JOB_STATUS, JOB_START_TIME, META_FILE, STATUS_RUNNING, STATUS_DONE , \
|
||||
BACKUP_JOB_ERROR_FILE, JOB_END_TIME, STATUS_FAILED, JOB_ERROR, \
|
||||
get_drt_imp_msg
|
||||
from meow_base.functionality.file_io import write_file, \
|
||||
threadsafe_read_status, threadsafe_update_status
|
||||
from meow_base.functionality.validation import check_implementation, \
|
||||
valid_string, valid_existing_dir_path, valid_natural
|
||||
valid_string, valid_existing_dir_path, valid_natural, valid_dir_path
|
||||
from meow_base.functionality.naming import generate_conductor_id
|
||||
|
||||
|
||||
@ -40,7 +50,6 @@ class BaseConductor:
|
||||
def __init__(self, name:str="", pause_time:int=5)->None:
|
||||
"""BaseConductor Constructor. This will check that any class inheriting
|
||||
from it implements its validation functions."""
|
||||
check_implementation(type(self).execute, BaseConductor)
|
||||
check_implementation(type(self).valid_execute_criteria, BaseConductor)
|
||||
if not name:
|
||||
name = generate_conductor_id()
|
||||
@ -129,7 +138,93 @@ class BaseConductor:
|
||||
process it or not. Must be implemented by any child process."""
|
||||
pass
|
||||
|
||||
def run_job(self, job_dir:str)->None:
|
||||
"""Function to actually execute a job. This will read job
|
||||
defintions from its meta file, update the meta file and attempt to
|
||||
execute. Some unspecific feedback will be given on execution failure,
|
||||
but depending on what it is it may be up to the job itself to provide
|
||||
more detailed feedback. If you simply wish to alter the conditions
|
||||
under which the job is executed, please instead look at the execute
|
||||
function."""
|
||||
valid_dir_path(job_dir, must_exist=True)
|
||||
|
||||
# Test our job parameters. Even if its gibberish, we still move to
|
||||
# output
|
||||
abort = False
|
||||
try:
|
||||
meta_file = os.path.join(job_dir, META_FILE)
|
||||
job = threadsafe_read_status(meta_file)
|
||||
valid_job(job)
|
||||
|
||||
# update the status file with running status
|
||||
threadsafe_update_status(
|
||||
{
|
||||
JOB_STATUS: STATUS_RUNNING,
|
||||
JOB_START_TIME: datetime.now()
|
||||
},
|
||||
meta_file
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# If something has gone wrong at this stage then its bad, so we
|
||||
# need to make our own error file
|
||||
error_file = os.path.join(job_dir, BACKUP_JOB_ERROR_FILE)
|
||||
write_file(f"Recieved incorrectly setup job.\n\n{e}", error_file)
|
||||
abort = True
|
||||
|
||||
# execute the job
|
||||
if not abort:
|
||||
try:
|
||||
result = subprocess.call(
|
||||
os.path.join(job_dir, job["tmp script command"]),
|
||||
cwd="."
|
||||
)
|
||||
|
||||
if result == 0:
|
||||
# Update the status file with the finalised status
|
||||
threadsafe_update_status(
|
||||
{
|
||||
JOB_STATUS: STATUS_DONE,
|
||||
JOB_END_TIME: datetime.now()
|
||||
},
|
||||
meta_file
|
||||
)
|
||||
|
||||
else:
|
||||
# Update the status file with the error status. Don't
|
||||
# overwrite any more specific error messages already
|
||||
# created
|
||||
threadsafe_update_status(
|
||||
{
|
||||
JOB_STATUS: STATUS_FAILED,
|
||||
JOB_END_TIME: datetime.now(),
|
||||
JOB_ERROR: "Job execution returned non-zero."
|
||||
},
|
||||
meta_file
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Update the status file with the error status. Don't overwrite
|
||||
# any more specific error messages already created
|
||||
threadsafe_update_status(
|
||||
{
|
||||
JOB_STATUS: STATUS_FAILED,
|
||||
JOB_END_TIME: datetime.now(),
|
||||
JOB_ERROR: f"Job execution failed. {e}"
|
||||
},
|
||||
meta_file
|
||||
)
|
||||
|
||||
# Move the contents of the execution directory to the final output
|
||||
# directory.
|
||||
job_output_dir = \
|
||||
os.path.join(self.job_output_dir, os.path.basename(job_dir))
|
||||
shutil.move(job_dir, job_output_dir)
|
||||
|
||||
def execute(self, job_dir:str)->None:
|
||||
"""Function to execute a given job directory. Must be implemented by
|
||||
any child process."""
|
||||
pass
|
||||
"""Function to run job execution. By default this will simply call the
|
||||
run_job function, to execute the job locally. However, this function
|
||||
may be overridden to execute the job in some other manner, such as on
|
||||
another resource. Note that the job itself should be executed using the
|
||||
run_job func in order to maintain expected logging etc."""
|
||||
self.run_job(job_dir)
|
@ -6,16 +6,23 @@ from for all handler instances.
|
||||
Author(s): David Marchant
|
||||
"""
|
||||
|
||||
import os
|
||||
import stat
|
||||
|
||||
from threading import Event, Thread
|
||||
from typing import Any, Tuple, Dict, Union
|
||||
from time import sleep
|
||||
|
||||
from meow_base.core.vars import VALID_CHANNELS, \
|
||||
VALID_HANDLER_NAME_CHARS, get_drt_imp_msg
|
||||
from meow_base.core.vars import VALID_CHANNELS, EVENT_RULE, EVENT_PATH, \
|
||||
VALID_HANDLER_NAME_CHARS, META_FILE, JOB_ID, WATCHDOG_BASE, JOB_FILE, \
|
||||
JOB_PARAMETERS, get_drt_imp_msg
|
||||
from meow_base.core.meow import valid_event
|
||||
from meow_base.functionality.file_io import threadsafe_write_status, \
|
||||
threadsafe_update_status, make_dir, write_file, lines_to_string
|
||||
from meow_base.functionality.validation import check_implementation, \
|
||||
valid_string, valid_natural
|
||||
from meow_base.functionality.meow import create_job_metadata_dict, \
|
||||
replace_keywords
|
||||
from meow_base.functionality.naming import generate_handler_id
|
||||
|
||||
class BaseHandler:
|
||||
@ -42,8 +49,9 @@ class BaseHandler:
|
||||
def __init__(self, name:str='', pause_time:int=5)->None:
|
||||
"""BaseHandler Constructor. This will check that any class inheriting
|
||||
from it implements its validation functions."""
|
||||
check_implementation(type(self).handle, BaseHandler)
|
||||
check_implementation(type(self).valid_handle_criteria, BaseHandler)
|
||||
check_implementation(type(self).get_created_job_type, BaseHandler)
|
||||
check_implementation(type(self).create_job_recipe_file, BaseHandler)
|
||||
if not name:
|
||||
name = generate_handler_id()
|
||||
self._is_valid_name(name)
|
||||
@ -137,8 +145,124 @@ class BaseHandler:
|
||||
pass
|
||||
|
||||
def handle(self, event:Dict[str,Any])->None:
|
||||
"""Function to handle a given event. Must be implemented by any child
|
||||
"""Function to handle a given event. May be overridden by any child
|
||||
process. Note that once any handling has occured, the
|
||||
send_job_to_runner function should be called to inform the runner of
|
||||
any resultant jobs."""
|
||||
pass
|
||||
rule = event[EVENT_RULE]
|
||||
|
||||
# Assemble job parameters dict from pattern variables
|
||||
yaml_dict = {}
|
||||
for var, val in rule.pattern.parameters.items():
|
||||
yaml_dict[var] = val
|
||||
for var, val in rule.pattern.outputs.items():
|
||||
yaml_dict[var] = val
|
||||
yaml_dict[rule.pattern.triggering_file] = event[EVENT_PATH]
|
||||
|
||||
# If no parameter sweeps, then one job will suffice
|
||||
if not rule.pattern.sweep:
|
||||
self.setup_job(event, yaml_dict)
|
||||
else:
|
||||
# If parameter sweeps, then many jobs created
|
||||
values_list = rule.pattern.expand_sweeps()
|
||||
for values in values_list:
|
||||
for value in values:
|
||||
yaml_dict[value[0]] = value[1]
|
||||
self.setup_job(event, yaml_dict)
|
||||
|
||||
def setup_job(self, event:Dict[str,Any], params_dict:Dict[str,Any])->None:
|
||||
"""Function to set up new job dict and send it to the runner to be
|
||||
executed."""
|
||||
|
||||
# Get base job metadata
|
||||
meow_job = self.create_job_metadata_dict(event, params_dict)
|
||||
|
||||
# Get updated job parameters
|
||||
# TODO replace this with generic implementation
|
||||
params_dict = replace_keywords(
|
||||
params_dict,
|
||||
meow_job[JOB_ID],
|
||||
event[EVENT_PATH],
|
||||
event[WATCHDOG_BASE]
|
||||
)
|
||||
|
||||
# Create a base job directory
|
||||
job_dir = os.path.join(self.job_queue_dir, meow_job[JOB_ID])
|
||||
make_dir(job_dir)
|
||||
|
||||
# Create job metadata file
|
||||
meta_file = self.create_job_meta_file(job_dir, meow_job)
|
||||
|
||||
# Create job recipe file
|
||||
recipe_command = self.create_job_recipe_file(job_dir, event, params_dict)
|
||||
|
||||
# Create job script file
|
||||
script_command = self.create_job_script_file(job_dir, recipe_command)
|
||||
|
||||
threadsafe_update_status(
|
||||
{
|
||||
# TODO make me not tmp variables and update job dict validation
|
||||
"tmp recipe command": recipe_command,
|
||||
"tmp script command": script_command
|
||||
},
|
||||
meta_file
|
||||
)
|
||||
|
||||
# Send job directory, as actual definitons will be read from within it
|
||||
self.send_job_to_runner(job_dir)
|
||||
|
||||
def get_created_job_type(self)->str:
|
||||
pass # Must implemented
|
||||
|
||||
def create_job_metadata_dict(self, event:Dict[str,Any],
|
||||
params_dict:Dict[str,Any])->Dict[str,Any]:
|
||||
return create_job_metadata_dict(
|
||||
self.get_created_job_type(),
|
||||
event,
|
||||
extras={
|
||||
JOB_PARAMETERS:params_dict
|
||||
}
|
||||
)
|
||||
|
||||
def create_job_meta_file(self, job_dir:str, meow_job:Dict[str,Any]
|
||||
)->Dict[str,Any]:
|
||||
meta_file = os.path.join(job_dir, META_FILE)
|
||||
|
||||
threadsafe_write_status(meow_job, meta_file)
|
||||
|
||||
return meta_file
|
||||
|
||||
def create_job_recipe_file(self, job_dir:str, event:Dict[str,Any], params_dict:Dict[str,Any]
|
||||
)->str:
|
||||
pass # Must implemented
|
||||
|
||||
def create_job_script_file(self, job_dir:str, recipe_command:str)->str:
|
||||
# TODO Make this more generic, so only checking hashes if that is present
|
||||
job_script = [
|
||||
"#!/bin/bash",
|
||||
"",
|
||||
"# Get job params",
|
||||
"given_hash=$(grep 'file_hash: *' $(dirname $0)/job.yml | tail -n1 | cut -c 14-)",
|
||||
"event_path=$(grep 'event_path: *' $(dirname $0)/job.yml | tail -n1 | cut -c 15-)",
|
||||
"",
|
||||
"echo event_path: $event_path",
|
||||
"echo given_hash: $given_hash",
|
||||
"",
|
||||
"# Check hash of input file to avoid race conditions",
|
||||
"actual_hash=$(sha256sum $event_path | cut -c -64)",
|
||||
"echo actual_hash: $actual_hash",
|
||||
"if [ $given_hash != $actual_hash ]; then",
|
||||
" echo Job was skipped as triggering file has been modified since scheduling",
|
||||
" exit 134",
|
||||
"fi",
|
||||
"",
|
||||
"# Call actual job script",
|
||||
recipe_command,
|
||||
"",
|
||||
"exit $?"
|
||||
]
|
||||
job_file = os.path.join(job_dir, JOB_FILE)
|
||||
write_file(lines_to_string(job_script), job_file)
|
||||
os.chmod(job_file, stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH )
|
||||
|
||||
return os.path.join(".", JOB_FILE)
|
@ -17,11 +17,13 @@ from meow_base.core.base_conductor import BaseConductor
|
||||
from meow_base.core.base_handler import BaseHandler
|
||||
from meow_base.core.base_monitor import BaseMonitor
|
||||
from meow_base.core.vars import DEBUG_WARNING, DEBUG_INFO, \
|
||||
VALID_CHANNELS, META_FILE, DEFAULT_JOB_OUTPUT_DIR, DEFAULT_JOB_QUEUE_DIR
|
||||
VALID_CHANNELS, META_FILE, DEFAULT_JOB_OUTPUT_DIR, DEFAULT_JOB_QUEUE_DIR, \
|
||||
JOB_STATUS, STATUS_QUEUED
|
||||
from meow_base.functionality.validation import check_type, valid_list, \
|
||||
valid_dir_path, check_implementation
|
||||
from meow_base.functionality.debug import setup_debugging, print_debug
|
||||
from meow_base.functionality.file_io import make_dir, threadsafe_read_status
|
||||
from meow_base.functionality.file_io import make_dir, threadsafe_read_status, \
|
||||
threadsafe_update_status
|
||||
from meow_base.functionality.process_io import wait
|
||||
|
||||
|
||||
@ -183,13 +185,20 @@ class MeowRunner:
|
||||
|
||||
message = connection.recv()
|
||||
|
||||
# Recieved an event
|
||||
# Recieved a job
|
||||
if isinstance(component, BaseHandler):
|
||||
self.job_queue.append(message)
|
||||
threadsafe_update_status(
|
||||
{
|
||||
JOB_STATUS: STATUS_QUEUED
|
||||
},
|
||||
os.path.join(message, META_FILE)
|
||||
)
|
||||
continue
|
||||
# Recieved a request for an event
|
||||
# Recieved a request for a job
|
||||
if isinstance(component, BaseConductor):
|
||||
valid = False
|
||||
print(f"Got request for job")
|
||||
for job_dir in self.job_queue:
|
||||
try:
|
||||
metafile = os.path.join(job_dir, META_FILE)
|
||||
|
11
core/vars.py
11
core/vars.py
@ -85,6 +85,7 @@ DEFAULT_JOB_QUEUE_DIR = "job_queue"
|
||||
DEFAULT_JOB_OUTPUT_DIR = "job_output"
|
||||
|
||||
# meow jobs
|
||||
JOB_FILE = "job.sh"
|
||||
JOB_TYPE = "job_type"
|
||||
JOB_TYPE_BASH = "bash"
|
||||
JOB_TYPE_PYTHON = "python"
|
||||
@ -125,6 +126,7 @@ JOB_REQUIREMENTS = "requirements"
|
||||
JOB_PARAMETERS = "parameters"
|
||||
|
||||
# job statuses
|
||||
STATUS_CREATING = "creating"
|
||||
STATUS_QUEUED = "queued"
|
||||
STATUS_RUNNING = "running"
|
||||
STATUS_SKIPPED = "skipped"
|
||||
@ -157,12 +159,3 @@ def get_not_imp_msg(parent_class, class_function):
|
||||
return f"Children of the '{parent_class.__name__}' class must implement " \
|
||||
f"the '{class_function.__name__}({signature(class_function)})' " \
|
||||
"function"
|
||||
|
||||
def get_base_file(job_type:str):
|
||||
return JOB_TYPES[job_type][0]
|
||||
|
||||
def get_job_file(job_type:str):
|
||||
return JOB_TYPES[job_type][1]
|
||||
|
||||
def get_result_file(job_type:str):
|
||||
return JOB_TYPES[job_type][2]
|
||||
|
Reference in New Issue
Block a user