added rudimentary conductor for job execution
This commit is contained in:
@ -1,68 +1,22 @@
|
||||
|
||||
import copy
|
||||
import nbformat
|
||||
import os
|
||||
import papermill
|
||||
import shutil
|
||||
import sys
|
||||
import threading
|
||||
|
||||
from datetime import datetime
|
||||
from multiprocessing import Pipe
|
||||
from time import sleep
|
||||
from typing import Any
|
||||
from watchdog.events import FileSystemEvent
|
||||
|
||||
from core.correctness.validation import check_type, valid_string, \
|
||||
valid_dict, valid_path, valid_list, valid_existing_dir_path, \
|
||||
setup_debugging
|
||||
from core.correctness.vars import VALID_VARIABLE_NAME_CHARS, VALID_CHANNELS, \
|
||||
SHA256, DEBUG_ERROR, DEBUG_WARNING, DEBUG_INFO, WATCHDOG_TYPE, \
|
||||
WATCHDOG_BASE, WATCHDOG_RULE, EVENT_PATH
|
||||
from core.functionality import wait, get_file_hash, generate_id, make_dir, \
|
||||
write_yaml, write_notebook, get_file_hash, parameterize_jupyter_notebook, \
|
||||
print_debug
|
||||
from core.meow import BaseRecipe, BaseHandler, BaseRule
|
||||
PYTHON_FUNC, DEBUG_INFO, WATCHDOG_TYPE, JOB_HASH, PYTHON_EXECUTION_BASE, \
|
||||
WATCHDOG_RULE, EVENT_PATH, PYTHON_TYPE, WATCHDOG_HASH, JOB_PARAMETERS, \
|
||||
PYTHON_OUTPUT_DIR
|
||||
from core.functionality import print_debug, create_job, replace_keywords
|
||||
from core.meow import BaseRecipe, BaseHandler
|
||||
from patterns.file_event_pattern import SWEEP_START, SWEEP_STOP, SWEEP_JUMP
|
||||
|
||||
# mig trigger keyword replacements
|
||||
KEYWORD_PATH = "{PATH}"
|
||||
KEYWORD_REL_PATH = "{REL_PATH}"
|
||||
KEYWORD_DIR = "{DIR}"
|
||||
KEYWORD_REL_DIR = "{REL_DIR}"
|
||||
KEYWORD_FILENAME = "{FILENAME}"
|
||||
KEYWORD_PREFIX = "{PREFIX}"
|
||||
KEYWORD_BASE = "{VGRID}"
|
||||
KEYWORD_EXTENSION = "{EXTENSION}"
|
||||
KEYWORD_JOB = "{JOB}"
|
||||
|
||||
# job definitions
|
||||
JOB_ID = 'id'
|
||||
JOB_PATTERN = 'pattern'
|
||||
JOB_RECIPE = 'recipe'
|
||||
JOB_RULE = 'rule'
|
||||
JOB_PATH = 'path'
|
||||
JOB_HASH = 'hash'
|
||||
JOB_STATUS = 'status'
|
||||
JOB_CREATE_TIME = 'create'
|
||||
JOB_START_TIME = 'start'
|
||||
JOB_END_TIME = 'end'
|
||||
JOB_ERROR = 'error'
|
||||
JOB_REQUIREMENTS = 'requirements'
|
||||
|
||||
# job statuses
|
||||
STATUS_QUEUED = 'queued'
|
||||
STATUS_RUNNING = 'running'
|
||||
STATUS_SKIPPED = 'skipped'
|
||||
STATUS_FAILED = 'failed'
|
||||
STATUS_DONE = 'done'
|
||||
|
||||
# job definition files
|
||||
META_FILE = 'job.yml'
|
||||
BASE_FILE = 'base.ipynb'
|
||||
PARAMS_FILE = 'params.yml'
|
||||
JOB_FILE = 'job.ipynb'
|
||||
RESULT_FILE = 'result.ipynb'
|
||||
|
||||
class JupyterNotebookRecipe(BaseRecipe):
|
||||
source:str
|
||||
@ -96,8 +50,6 @@ class PapermillHandler(BaseHandler):
|
||||
debug_level:int
|
||||
_worker:threading.Thread
|
||||
_stop_pipe:Pipe
|
||||
_jobs:list[str]
|
||||
_jobs_lock:threading.Lock
|
||||
_print_target:Any
|
||||
def __init__(self, handler_base:str, output_dir:str, print:Any=sys.stdout,
|
||||
logging:int=0)->None:
|
||||
@ -106,21 +58,16 @@ class PapermillHandler(BaseHandler):
|
||||
self.handler_base = handler_base
|
||||
self._is_valid_output_dir(output_dir)
|
||||
self.output_dir = output_dir
|
||||
self._print_target, self.debug_level = setup_debugging(print, logging)
|
||||
self._print_target, self.debug_level = setup_debugging(print, logging)
|
||||
self._worker = None
|
||||
self._stop_pipe = Pipe()
|
||||
self._jobs = []
|
||||
self._jobs_lock = threading.Lock()
|
||||
print_debug(self._print_target, self.debug_level,
|
||||
"Created new PapermillHandler instance", DEBUG_INFO)
|
||||
|
||||
def handle(self, event:dict[Any,Any])->None:
|
||||
# TODO finish implementation and test
|
||||
|
||||
def handle(self, event:dict[str,Any])->None:
|
||||
print_debug(self._print_target, self.debug_level,
|
||||
f"Handling event {event[EVENT_PATH]}", DEBUG_INFO)
|
||||
|
||||
file_hash = get_file_hash(event[EVENT_PATH], SHA256)
|
||||
rule = event[WATCHDOG_RULE]
|
||||
|
||||
yaml_dict = {}
|
||||
@ -131,17 +78,7 @@ class PapermillHandler(BaseHandler):
|
||||
yaml_dict[rule.pattern.triggering_file] = event[EVENT_PATH]
|
||||
|
||||
if not rule.pattern.sweep:
|
||||
waiting_for_threaded_resources = True
|
||||
while waiting_for_threaded_resources:
|
||||
try:
|
||||
worker = threading.Thread(
|
||||
target=self.execute_job,
|
||||
args=[event, yaml_dict, file_hash])
|
||||
worker.daemon = True
|
||||
worker.start()
|
||||
waiting_for_threaded_resources = False
|
||||
except threading.ThreadError:
|
||||
sleep(1)
|
||||
self.setup_job(event, yaml_dict)
|
||||
else:
|
||||
for var, val in rule.pattern.sweep.items():
|
||||
values = []
|
||||
@ -152,36 +89,7 @@ class PapermillHandler(BaseHandler):
|
||||
|
||||
for value in values:
|
||||
yaml_dict[var] = value
|
||||
waiting_for_threaded_resources = True
|
||||
while waiting_for_threaded_resources:
|
||||
try:
|
||||
worker = threading.Thread(
|
||||
target=self.execute_job,
|
||||
args=[event, yaml_dict, file_hash])
|
||||
worker.daemon = True
|
||||
worker.start()
|
||||
waiting_for_threaded_resources = False
|
||||
except threading.ThreadError:
|
||||
sleep(1)
|
||||
|
||||
def add_job(self, job):
|
||||
self._jobs_lock.acquire()
|
||||
try:
|
||||
self._jobs.append(job)
|
||||
except Exception as e:
|
||||
self._jobs_lock.release()
|
||||
raise e
|
||||
self._jobs_lock.release()
|
||||
|
||||
def get_jobs(self):
|
||||
self._jobs_lock.acquire()
|
||||
try:
|
||||
jobs_deepcopy = copy.deepcopy(self._jobs)
|
||||
except Exception as e:
|
||||
self._jobs_lock.release()
|
||||
raise e
|
||||
self._jobs_lock.release()
|
||||
return jobs_deepcopy
|
||||
self.setup_job(event, yaml_dict)
|
||||
|
||||
def valid_event_types(self)->list[str]:
|
||||
return [WATCHDOG_TYPE]
|
||||
@ -195,135 +103,102 @@ class PapermillHandler(BaseHandler):
|
||||
def _is_valid_output_dir(self, output_dir)->None:
|
||||
valid_existing_dir_path(output_dir, allow_base=True)
|
||||
|
||||
def execute_job(self, event:FileSystemEvent,
|
||||
yaml_dict:dict[str,Any], triggerfile_hash:str)->None:
|
||||
def setup_job(self, event:dict[str,Any], yaml_dict:dict[str,Any])->None:
|
||||
meow_job = create_job(PYTHON_TYPE, event, {
|
||||
JOB_PARAMETERS:yaml_dict,
|
||||
JOB_HASH: event[WATCHDOG_HASH],
|
||||
PYTHON_FUNC:job_func,
|
||||
PYTHON_OUTPUT_DIR:self.output_dir,
|
||||
PYTHON_EXECUTION_BASE:self.handler_base,})
|
||||
print_debug(self._print_target, self.debug_level,
|
||||
f"Creating job from event at {event[EVENT_PATH]} of type "
|
||||
f"{PYTHON_TYPE}.", DEBUG_INFO)
|
||||
self.to_runner.send(meow_job)
|
||||
|
||||
job_dict = {
|
||||
JOB_ID: generate_id(prefix="job_", existing_ids=self.get_jobs()),
|
||||
JOB_PATTERN: event[WATCHDOG_RULE].pattern,
|
||||
JOB_RECIPE: event[WATCHDOG_RULE].recipe,
|
||||
JOB_RULE: event[WATCHDOG_RULE].name,
|
||||
JOB_PATH: event[EVENT_PATH],
|
||||
JOB_HASH: triggerfile_hash,
|
||||
JOB_STATUS: STATUS_QUEUED,
|
||||
JOB_CREATE_TIME: datetime.now(),
|
||||
JOB_REQUIREMENTS: event[WATCHDOG_RULE].recipe.requirements
|
||||
}
|
||||
def job_func(job):
|
||||
import os
|
||||
import shutil
|
||||
import papermill
|
||||
from datetime import datetime
|
||||
from core.functionality import make_dir, write_yaml, \
|
||||
write_notebook, get_file_hash, parameterize_jupyter_notebook
|
||||
from core.correctness.vars import JOB_EVENT, WATCHDOG_RULE, \
|
||||
JOB_ID, EVENT_PATH, WATCHDOG_BASE, META_FILE, \
|
||||
BASE_FILE, PARAMS_FILE, JOB_FILE, RESULT_FILE, JOB_STATUS, \
|
||||
JOB_START_TIME, STATUS_RUNNING, JOB_HASH, SHA256, \
|
||||
STATUS_SKIPPED, STATUS_DONE, JOB_END_TIME, \
|
||||
JOB_ERROR, STATUS_FAILED, PYTHON_EXECUTION_BASE, PYTHON_OUTPUT_DIR
|
||||
|
||||
print_debug(self._print_target, self.debug_level,
|
||||
f"Creating job for event at {event[EVENT_PATH]} with ID "
|
||||
f"{job_dict[JOB_ID]}", DEBUG_INFO)
|
||||
event = job[JOB_EVENT]
|
||||
|
||||
self.add_job(job_dict[JOB_ID])
|
||||
yaml_dict = replace_keywords(
|
||||
job[JOB_PARAMETERS],
|
||||
job[JOB_ID],
|
||||
event[EVENT_PATH],
|
||||
event[WATCHDOG_BASE]
|
||||
)
|
||||
|
||||
yaml_dict = self.replace_keywords(
|
||||
yaml_dict,
|
||||
job_dict[JOB_ID],
|
||||
event[EVENT_PATH],
|
||||
event[WATCHDOG_BASE]
|
||||
job_dir = os.path.join(job[PYTHON_EXECUTION_BASE], job[JOB_ID])
|
||||
make_dir(job_dir)
|
||||
|
||||
meta_file = os.path.join(job_dir, META_FILE)
|
||||
write_yaml(job, meta_file)
|
||||
|
||||
base_file = os.path.join(job_dir, BASE_FILE)
|
||||
write_notebook(event[WATCHDOG_RULE].recipe.recipe, base_file)
|
||||
|
||||
param_file = os.path.join(job_dir, PARAMS_FILE)
|
||||
write_yaml(yaml_dict, param_file)
|
||||
|
||||
job_file = os.path.join(job_dir, JOB_FILE)
|
||||
result_file = os.path.join(job_dir, RESULT_FILE)
|
||||
|
||||
job[JOB_STATUS] = STATUS_RUNNING
|
||||
job[JOB_START_TIME] = datetime.now()
|
||||
|
||||
write_yaml(job, meta_file)
|
||||
|
||||
if JOB_HASH in job:
|
||||
triggerfile_hash = get_file_hash(job[JOB_EVENT][EVENT_PATH], SHA256)
|
||||
if not triggerfile_hash \
|
||||
or triggerfile_hash != job[JOB_HASH]:
|
||||
job[JOB_STATUS] = STATUS_SKIPPED
|
||||
job[JOB_END_TIME] = datetime.now()
|
||||
msg = "Job was skipped as triggering file " + \
|
||||
f"'{job[JOB_EVENT][EVENT_PATH]}' has been modified since " + \
|
||||
"scheduling. Was expected to have hash " + \
|
||||
f"'{job[JOB_HASH]}' but has '{triggerfile_hash}'."
|
||||
job[JOB_ERROR] = msg
|
||||
write_yaml(job, meta_file)
|
||||
return
|
||||
|
||||
try:
|
||||
job_notebook = parameterize_jupyter_notebook(
|
||||
event[WATCHDOG_RULE].recipe.recipe, yaml_dict
|
||||
)
|
||||
|
||||
job_dir = os.path.join(self.handler_base, job_dict[JOB_ID])
|
||||
make_dir(job_dir)
|
||||
|
||||
meta_file = os.path.join(job_dir, META_FILE)
|
||||
write_yaml(job_dict, meta_file)
|
||||
|
||||
base_file = os.path.join(job_dir, BASE_FILE)
|
||||
write_notebook(event[WATCHDOG_RULE].recipe.recipe, base_file)
|
||||
|
||||
param_file = os.path.join(job_dir, PARAMS_FILE)
|
||||
write_yaml(yaml_dict, param_file)
|
||||
|
||||
job_file = os.path.join(job_dir, JOB_FILE)
|
||||
result_file = os.path.join(job_dir, RESULT_FILE)
|
||||
|
||||
job_dict[JOB_STATUS] = STATUS_RUNNING
|
||||
job_dict[JOB_START_TIME] = datetime.now()
|
||||
|
||||
write_yaml(job_dict, meta_file)
|
||||
|
||||
if JOB_HASH in job_dict:
|
||||
triggerfile_hash = get_file_hash(job_dict[JOB_PATH], SHA256)
|
||||
if not triggerfile_hash \
|
||||
or triggerfile_hash != job_dict[JOB_HASH]:
|
||||
job_dict[JOB_STATUS] = STATUS_SKIPPED
|
||||
job_dict[JOB_END_TIME] = datetime.now()
|
||||
msg = "Job was skipped as triggering file " + \
|
||||
f"'{job_dict[JOB_PATH]}' has been modified since " + \
|
||||
"scheduling. Was expected to have hash " + \
|
||||
f"'{job_dict[JOB_HASH]}' but has '{triggerfile_hash}'."
|
||||
job_dict[JOB_ERROR] = msg
|
||||
write_yaml(job_dict, meta_file)
|
||||
print_debug(self._print_target, self.debug_level,
|
||||
msg, DEBUG_ERROR)
|
||||
return
|
||||
|
||||
try:
|
||||
job_notebook = parameterize_jupyter_notebook(
|
||||
event[WATCHDOG_RULE].recipe.recipe, yaml_dict
|
||||
)
|
||||
write_notebook(job_notebook, job_file)
|
||||
except Exception:
|
||||
job_dict[JOB_STATUS] = STATUS_FAILED
|
||||
job_dict[JOB_END_TIME] = datetime.now()
|
||||
msg = f"Job file {job_dict[JOB_ID]} was not created successfully"
|
||||
job_dict[JOB_ERROR] = msg
|
||||
write_yaml(job_dict, meta_file)
|
||||
print_debug(self._print_target, self.debug_level,
|
||||
msg, DEBUG_ERROR)
|
||||
return
|
||||
|
||||
try:
|
||||
papermill.execute_notebook(job_file, result_file, {})
|
||||
except Exception:
|
||||
job_dict[JOB_STATUS] = STATUS_FAILED
|
||||
job_dict[JOB_END_TIME] = datetime.now()
|
||||
msg = 'Result file %s was not created successfully'
|
||||
job_dict[JOB_ERROR] = msg
|
||||
write_yaml(job_dict, meta_file)
|
||||
print_debug(self._print_target, self.debug_level,
|
||||
msg, DEBUG_ERROR)
|
||||
return
|
||||
|
||||
job_dict[JOB_STATUS] = STATUS_DONE
|
||||
job_dict[JOB_END_TIME] = datetime.now()
|
||||
write_yaml(job_dict, meta_file)
|
||||
|
||||
job_output_dir = os.path.join(self.output_dir, job_dict[JOB_ID])
|
||||
|
||||
shutil.move(job_dir, job_output_dir)
|
||||
|
||||
print_debug(self._print_target, self.debug_level,
|
||||
f"Completed job {job_dict[JOB_ID]} with output at "
|
||||
f"{job_output_dir}", DEBUG_INFO)
|
||||
|
||||
write_notebook(job_notebook, job_file)
|
||||
except Exception as e:
|
||||
job[JOB_STATUS] = STATUS_FAILED
|
||||
job[JOB_END_TIME] = datetime.now()
|
||||
msg = f"Job file {job[JOB_ID]} was not created successfully. {e}"
|
||||
job[JOB_ERROR] = msg
|
||||
write_yaml(job, meta_file)
|
||||
return
|
||||
|
||||
def replace_keywords(self, old_dict:dict[str,str], job_id:str,
|
||||
src_path:str, monitor_base:str)->dict[str,str]:
|
||||
new_dict = {}
|
||||
try:
|
||||
papermill.execute_notebook(job_file, result_file, {})
|
||||
except Exception as e:
|
||||
job[JOB_STATUS] = STATUS_FAILED
|
||||
job[JOB_END_TIME] = datetime.now()
|
||||
msg = f"Result file {result_file} was not created successfully. {e}"
|
||||
job[JOB_ERROR] = msg
|
||||
write_yaml(job, meta_file)
|
||||
return
|
||||
|
||||
filename = os.path.basename(src_path)
|
||||
dirname = os.path.dirname(src_path)
|
||||
relpath = os.path.relpath(src_path, monitor_base)
|
||||
reldirname = os.path.dirname(relpath)
|
||||
(prefix, extension) = os.path.splitext(filename)
|
||||
job[JOB_STATUS] = STATUS_DONE
|
||||
job[JOB_END_TIME] = datetime.now()
|
||||
write_yaml(job, meta_file)
|
||||
|
||||
for var, val in old_dict.items():
|
||||
if isinstance(val, str):
|
||||
val = val.replace(KEYWORD_PATH, src_path)
|
||||
val = val.replace(KEYWORD_REL_PATH, relpath)
|
||||
val = val.replace(KEYWORD_DIR, dirname)
|
||||
val = val.replace(KEYWORD_REL_DIR, reldirname)
|
||||
val = val.replace(KEYWORD_FILENAME, filename)
|
||||
val = val.replace(KEYWORD_PREFIX, prefix)
|
||||
val = val.replace(KEYWORD_BASE, monitor_base)
|
||||
val = val.replace(KEYWORD_EXTENSION, extension)
|
||||
val = val.replace(KEYWORD_JOB, job_id)
|
||||
job_output_dir = os.path.join(job[PYTHON_OUTPUT_DIR], job[JOB_ID])
|
||||
|
||||
new_dict[var] = val
|
||||
else:
|
||||
new_dict[var] = val
|
||||
|
||||
return new_dict
|
||||
shutil.move(job_dir, job_output_dir)
|
||||
|
Reference in New Issue
Block a user