differentiated papermill and python jobs more clearly

This commit is contained in:
PatchOfScotland
2023-02-03 14:47:16 +01:00
parent 72d6b263b7
commit 47f9fe73fa
17 changed files with 853 additions and 119 deletions

View File

@ -72,6 +72,12 @@ def check_type(variable:Any, expected_type:type, alt_types:list[type]=[],
% (get_args(expected_type), type(variable))
)
def check_callable(call:Any)->None:
"""Checks if a given variable is a callable function. Raises TypeError if
not."""
if not callable(call):
raise TypeError(f"Given object '{call}' is not a callable function")
def check_implementation(child_func, parent_class):
"""Checks if the given function has been overridden from the one inherited
from the parent class. Raises a NotImplementedError if this is the case."""
@ -94,6 +100,14 @@ def check_implementation(child_func, parent_class):
msg = get_not_imp_msg(parent_class, parent_func)
raise NotImplementedError(msg)
def check_script(script:Any):
"""Checks if a given variable is a valid script. Raises TypeError if
not."""
# TODO investigate more robust check here
check_type(script, list)
for line in script:
check_type(line, str)
def valid_string(variable:str, valid_chars:str, min_length:int=1)->None:
"""Checks that all characters in a given string are present in a provided
list of characters. Will raise an ValueError if unexpected character is

View File

@ -80,10 +80,24 @@ DIR_EVENTS = [
# meow jobs
JOB_TYPE = "job_type"
JOB_TYPE_PYTHON = "python"
JOB_TYPE_PAPERMILL = "papermill"
PYTHON_FUNC = "func"
PYTHON_EXECUTION_BASE = "exection_base"
PYTHON_OUTPUT_DIR = "output_dir"
JOB_TYPES = {
JOB_TYPE_PAPERMILL: [
"base.ipynb",
"job.ipynb",
"result.ipynb",
],
JOB_TYPE_PYTHON: [
"base.py",
"job.py",
"result.py",
]
}
# job definitions
JOB_ID = "id"
JOB_EVENT = "event"
@ -108,10 +122,7 @@ STATUS_DONE = "done"
# job definition files
META_FILE = "job.yml"
BASE_FILE = "base.ipynb"
PARAMS_FILE = "params.yml"
JOB_FILE = "job.ipynb"
RESULT_FILE = "result.ipynb"
# Parameter sweep keys
SWEEP_START = "start"
@ -132,3 +143,12 @@ def get_not_imp_msg(parent_class, class_function):
return f"Children of the '{parent_class.__name__}' class must implement " \
f"the '{class_function.__name__}({signature(class_function)})' " \
"function"
def get_base_file(job_type:str):
return JOB_TYPES[job_type][0]
def get_job_file(job_type:str):
return JOB_TYPES[job_type][1]
def get_result_file(job_type:str):
return JOB_TYPES[job_type][2]

View File

@ -1,4 +1,4 @@
# TODO comments
import copy
import hashlib
import json
@ -15,7 +15,7 @@ from typing import Any
from random import SystemRandom
from core.correctness.validation import check_type, valid_existing_file_path, \
valid_path
valid_path, check_script
from core.correctness.vars import CHAR_LOWERCASE, CHAR_UPPERCASE, \
VALID_CHANNELS, HASH_BUFFER_SIZE, SHA256, DEBUG_WARNING, DEBUG_INFO, \
EVENT_TYPE, EVENT_PATH, JOB_EVENT, JOB_TYPE, JOB_ID, JOB_PATTERN, \
@ -128,6 +128,18 @@ def make_dir(path:str, can_exist:bool=True, ensure_clean:bool=False):
os.makedirs(path, exist_ok=can_exist)
def read_file(filepath:str):
with open(filepath, 'r') as file:
return file.read()
def read_file_lines(filepath:str):
with open(filepath, 'r') as file:
return file.readlines()
def write_file(source:str, filename:str):
with open(filename, 'w') as file:
file.write(source)
def read_yaml(filepath:str):
"""
Reads a file path as a yaml object.
@ -171,7 +183,7 @@ def write_notebook(source:dict[str,Any], filename:str):
json.dump(source, job_file)
# Adapted from: https://github.com/rasmunk/notebook_parameterizer
def parameterize_jupyter_notebook( jupyter_notebook:dict[str,Any],
def parameterize_jupyter_notebook(jupyter_notebook:dict[str,Any],
parameters:dict[str,Any], expand_env_values:bool=False)->dict[str,Any]:
nbformat.validate(jupyter_notebook)
check_type(parameters, dict)
@ -244,6 +256,38 @@ def parameterize_jupyter_notebook( jupyter_notebook:dict[str,Any],
return output_notebook
def parameterize_python_script(script:list[str], parameters:dict[str,Any],
expand_env_values:bool=False)->dict[str,Any]:
check_script(script)
check_type(parameters, dict)
output_script = copy.deepcopy(script)
for i, line in enumerate(output_script):
if "=" in line:
d_line = list(map(lambda x: x.replace(" ", ""),
line.split("=")))
# Matching parameter name
if len(d_line) == 2 and d_line[0] in parameters:
value = parameters[d_line[0]]
# Whether to expand value from os env
if (
expand_env_values
and isinstance(value, str)
and value.startswith("ENV_")
):
env_var = value.replace("ENV_", "")
value = os.getenv(
env_var,
"MISSING ENVIRONMENT VARIABLE: {}".format(env_var)
)
output_script[i] = f"{d_line[0]} = {repr(value)}"
# Validate that the parameterized notebook is still valid
check_script(output_script)
return output_script
def print_debug(print_target, debug_level, msg, level)->None:
if print_target is None:
return
@ -338,3 +382,6 @@ def create_job(job_type:str, event:dict[str,Any], extras:dict[Any,Any]={}
}
return {**extras, **job_dict}
def lines_to_string(lines:list[str])->str:
return "\n".join(lines)

View File

@ -98,7 +98,6 @@ class BasePattern:
check_implementation(type(self)._is_valid_recipe, BasePattern)
check_implementation(type(self)._is_valid_parameters, BasePattern)
check_implementation(type(self)._is_valid_output, BasePattern)
check_implementation(type(self)._is_valid_sweep, BasePattern)
self._is_valid_name(name)
self.name = name
self._is_valid_recipe(recipe)
@ -140,8 +139,9 @@ class BasePattern:
pass
def _is_valid_sweep(self, sweep:dict[str,Union[int,float,complex]])->None:
"""Validation check for 'sweep' variable from main constructor. Must
be implemented by any child class."""
"""Validation check for 'sweep' variable from main constructor. This
function is implemented to check for the types given in the signature,
and must be overridden if these differ."""
check_type(sweep, dict)
if not sweep:
return
@ -208,13 +208,19 @@ class BaseRule:
the input parameters."""
check_implementation(type(self)._is_valid_pattern, BaseRule)
check_implementation(type(self)._is_valid_recipe, BaseRule)
self.__check_types_set()
self._is_valid_name(name)
self.name = name
self._is_valid_pattern(pattern)
self.pattern = pattern
self._is_valid_recipe(recipe)
self.recipe = recipe
self.__check_types_set()
check_type(pattern, BasePattern)
check_type(recipe, BaseRecipe)
if pattern.recipe != recipe.name:
raise ValueError(f"Cannot create Rule {name}. Pattern "
f"{pattern.name} does not identify Recipe {recipe.name}. It "
f"uses {pattern.recipe}")
def __new__(cls, *args, **kwargs):
"""A check that this base class is not instantiated itself, only