reformted jobs being passed to conductors so they only get a job directory and have to read the definitions from the appropriate files
This commit is contained in:
@ -12,14 +12,14 @@ import sys
|
||||
from typing import Any, Tuple, Dict
|
||||
|
||||
from core.correctness.validation import check_type, valid_string, \
|
||||
valid_dict, valid_path, valid_existing_dir_path, setup_debugging, \
|
||||
valid_dict, valid_path, valid_dir_path, setup_debugging, \
|
||||
valid_event
|
||||
from core.correctness.vars import VALID_VARIABLE_NAME_CHARS, PYTHON_FUNC, \
|
||||
DEBUG_INFO, EVENT_TYPE_WATCHDOG, JOB_HASH, PYTHON_EXECUTION_BASE, \
|
||||
DEBUG_INFO, EVENT_TYPE_WATCHDOG, JOB_HASH, DEFAULT_JOB_QUEUE_DIR, \
|
||||
EVENT_PATH, JOB_TYPE_PAPERMILL, WATCHDOG_HASH, JOB_PARAMETERS, \
|
||||
PYTHON_OUTPUT_DIR, JOB_ID, WATCHDOG_BASE, META_FILE, \
|
||||
JOB_ID, WATCHDOG_BASE, META_FILE, \
|
||||
PARAMS_FILE, JOB_STATUS, STATUS_QUEUED, EVENT_RULE, EVENT_TYPE, \
|
||||
EVENT_RULE, get_base_file, get_job_file, get_result_file
|
||||
EVENT_RULE, get_base_file
|
||||
from core.functionality import print_debug, create_job, replace_keywords, \
|
||||
make_dir, write_yaml, write_notebook, read_notebook
|
||||
from core.meow import BaseRecipe, BaseHandler
|
||||
@ -44,7 +44,7 @@ class JupyterNotebookRecipe(BaseRecipe):
|
||||
def _is_valid_recipe(self, recipe:Dict[str,Any])->None:
|
||||
"""Validation check for 'recipe' variable from main constructor.
|
||||
Called within parent BaseRecipe constructor."""
|
||||
check_type(recipe, Dict)
|
||||
check_type(recipe, Dict, hint="JupyterNotebookRecipe.recipe")
|
||||
nbformat.validate(recipe)
|
||||
|
||||
def _is_valid_parameters(self, parameters:Dict[str,Any])->None:
|
||||
@ -62,26 +62,20 @@ class JupyterNotebookRecipe(BaseRecipe):
|
||||
valid_string(k, VALID_VARIABLE_NAME_CHARS)
|
||||
|
||||
class PapermillHandler(BaseHandler):
|
||||
# handler directory to setup jobs in
|
||||
handler_base:str
|
||||
# TODO move me to conductor?
|
||||
# Final location for job output to be placed
|
||||
output_dir:str
|
||||
# Config option, above which debug messages are ignored
|
||||
debug_level:int
|
||||
# Where print messages are sent
|
||||
_print_target:Any
|
||||
def __init__(self, handler_base:str, output_dir:str, print:Any=sys.stdout,
|
||||
logging:int=0)->None:
|
||||
def __init__(self, job_queue_dir:str=DEFAULT_JOB_QUEUE_DIR,
|
||||
print:Any=sys.stdout, logging:int=0)->None:
|
||||
"""PapermillHandler Constructor. This creats jobs to be executed using
|
||||
the papermill module. This does not run as a continuous thread to
|
||||
handle execution, but is invoked according to a factory pattern using
|
||||
the handle function."""
|
||||
the handle function. Note that if this handler is given to a MeowRunner
|
||||
object, the job_queue_dir will be overwridden."""
|
||||
super().__init__()
|
||||
self._is_valid_handler_base(handler_base)
|
||||
self.handler_base = handler_base
|
||||
self._is_valid_output_dir(output_dir)
|
||||
self.output_dir = output_dir
|
||||
self._is_valid_job_queue_dir(job_queue_dir)
|
||||
self.job_queue_dir = job_queue_dir
|
||||
self._print_target, self.debug_level = setup_debugging(print, logging)
|
||||
print_debug(self._print_target, self.debug_level,
|
||||
"Created new PapermillHandler instance", DEBUG_INFO)
|
||||
@ -125,15 +119,12 @@ class PapermillHandler(BaseHandler):
|
||||
pass
|
||||
return False, str(e)
|
||||
|
||||
def _is_valid_handler_base(self, handler_base)->None:
|
||||
"""Validation check for 'handler_base' variable from main
|
||||
def _is_valid_job_queue_dir(self, job_queue_dir)->None:
|
||||
"""Validation check for 'job_queue_dir' variable from main
|
||||
constructor."""
|
||||
valid_existing_dir_path(handler_base)
|
||||
|
||||
def _is_valid_output_dir(self, output_dir)->None:
|
||||
"""Validation check for 'output_dir' variable from main
|
||||
constructor."""
|
||||
valid_existing_dir_path(output_dir, allow_base=True)
|
||||
valid_dir_path(job_queue_dir, must_exist=False)
|
||||
if not os.path.exists(job_queue_dir):
|
||||
make_dir(job_queue_dir)
|
||||
|
||||
def setup_job(self, event:Dict[str,Any], yaml_dict:Dict[str,Any])->None:
|
||||
"""Function to set up new job dict and send it to the runner to be
|
||||
@ -145,8 +136,6 @@ class PapermillHandler(BaseHandler):
|
||||
JOB_PARAMETERS:yaml_dict,
|
||||
JOB_HASH: event[WATCHDOG_HASH],
|
||||
PYTHON_FUNC:papermill_job_func,
|
||||
PYTHON_OUTPUT_DIR:self.output_dir,
|
||||
PYTHON_EXECUTION_BASE:self.handler_base
|
||||
}
|
||||
)
|
||||
print_debug(self._print_target, self.debug_level,
|
||||
@ -162,8 +151,7 @@ class PapermillHandler(BaseHandler):
|
||||
)
|
||||
|
||||
# Create a base job directory
|
||||
job_dir = os.path.join(
|
||||
meow_job[PYTHON_EXECUTION_BASE], meow_job[JOB_ID])
|
||||
job_dir = os.path.join(self.job_queue_dir, meow_job[JOB_ID])
|
||||
make_dir(job_dir)
|
||||
|
||||
# write a status file to the job directory
|
||||
@ -187,7 +175,7 @@ class PapermillHandler(BaseHandler):
|
||||
self.to_runner.send(job_dir)
|
||||
|
||||
# Papermill job execution code, to be run within the conductor
|
||||
def papermill_job_func(job):
|
||||
def papermill_job_func(job_dir):
|
||||
# Requires own imports as will be run in its own execution environment
|
||||
import os
|
||||
import papermill
|
||||
@ -197,11 +185,11 @@ def papermill_job_func(job):
|
||||
from core.correctness.vars import JOB_EVENT, JOB_ID, \
|
||||
EVENT_PATH, META_FILE, PARAMS_FILE, \
|
||||
JOB_STATUS, JOB_HASH, SHA256, STATUS_SKIPPED, JOB_END_TIME, \
|
||||
JOB_ERROR, STATUS_FAILED, PYTHON_EXECUTION_BASE, get_job_file, \
|
||||
JOB_ERROR, STATUS_FAILED, get_job_file, \
|
||||
get_result_file
|
||||
|
||||
|
||||
# Identify job files
|
||||
job_dir = os.path.join(job[PYTHON_EXECUTION_BASE], job[JOB_ID])
|
||||
meta_file = os.path.join(job_dir, META_FILE)
|
||||
# TODO fix these paths so they are dynamic
|
||||
base_file = os.path.join(job_dir, get_base_file(JOB_TYPE_PAPERMILL))
|
||||
@ -209,6 +197,8 @@ def papermill_job_func(job):
|
||||
result_file = os.path.join(job_dir, get_result_file(JOB_TYPE_PAPERMILL))
|
||||
param_file = os.path.join(job_dir, PARAMS_FILE)
|
||||
|
||||
# Get job defintions
|
||||
job = read_yaml(meta_file)
|
||||
yaml_dict = read_yaml(param_file)
|
||||
|
||||
# Check the hash of the triggering file, if present. This addresses
|
||||
|
Reference in New Issue
Block a user