split functionality file across new module as was getting too long on its own. All functions remian the same, but imports will need to be updated.

This commit is contained in:
PatchOfScotland
2023-02-10 14:46:46 +01:00
parent dfd83e28dc
commit 7f8b4f3e32
23 changed files with 540 additions and 464 deletions

View File

@ -1,3 +0,0 @@
from core.correctness.validation import *
from core.correctness.vars import *

View File

@ -8,7 +8,7 @@ Author(s): David Marchant
from datetime import datetime
from inspect import signature
from os.path import sep, exists, isfile, isdir, dirname
from typing import Any, _SpecialForm, Union, Tuple, Type, Dict, List, \
from typing import Any, _SpecialForm, Union, Type, Dict, List, \
get_origin, get_args
from core.correctness.vars import VALID_PATH_CHARS, get_not_imp_msg, \
@ -248,22 +248,6 @@ def valid_non_existing_path(variable:str, allow_base:bool=False):
raise ValueError(
f"Route to requested path '{variable}' does not exist.")
def setup_debugging(print:Any=None, logging:int=0)->Tuple[Any,int]:
"""Create a place for debug messages to be sent. Always returns a place,
along with a logging level."""
check_type(logging, int)
if print is None:
return None, 0
else:
if not isinstance(print, object):
raise TypeError(f"Invalid print location provided")
writeable = getattr(print, "write", None)
if not writeable or not callable(writeable):
raise TypeError(f"Print object does not implement required "
"'write' function")
return print, logging
def valid_meow_dict(meow_dict:Dict[str,Any], msg:str,
keys:Dict[str,Type])->None:
"""Check given dictionary expresses a meow construct. This won't do much

View File

@ -1,402 +0,0 @@
# TODO comments
import copy
import hashlib
import json
import nbformat
import os
import yaml
from datetime import datetime
from typing import List
from multiprocessing.connection import Connection, wait as multi_wait
# Need to import additional Connection type for Windows machines
if os.name == 'nt':
from multiprocessing.connection import PipeConnection
from multiprocessing.queues import Queue
from papermill.translators import papermill_translators
from typing import Any, Dict
from random import SystemRandom
from core.correctness.validation import check_type, valid_existing_file_path, \
valid_path, check_script
from core.correctness.vars import CHAR_LOWERCASE, CHAR_UPPERCASE, \
VALID_CHANNELS, HASH_BUFFER_SIZE, SHA256, DEBUG_WARNING, DEBUG_INFO, \
EVENT_TYPE, EVENT_PATH, JOB_EVENT, JOB_TYPE, JOB_ID, JOB_PATTERN, \
JOB_RECIPE, JOB_RULE, EVENT_RULE, JOB_STATUS, STATUS_QUEUED, \
JOB_CREATE_TIME, JOB_REQUIREMENTS, WATCHDOG_BASE, WATCHDOG_HASH, \
EVENT_TYPE_WATCHDOG
# mig trigger keyword replacements
KEYWORD_PATH = "{PATH}"
KEYWORD_REL_PATH = "{REL_PATH}"
KEYWORD_DIR = "{DIR}"
KEYWORD_REL_DIR = "{REL_DIR}"
KEYWORD_FILENAME = "{FILENAME}"
KEYWORD_PREFIX = "{PREFIX}"
KEYWORD_BASE = "{VGRID}"
KEYWORD_EXTENSION = "{EXTENSION}"
KEYWORD_JOB = "{JOB}"
#TODO Make this guaranteed unique
def generate_id(prefix:str="", length:int=16, existing_ids:List[str]=[],
charset:str=CHAR_UPPERCASE+CHAR_LOWERCASE, attempts:int=24):
random_length = max(length - len(prefix), 0)
for _ in range(attempts):
id = prefix + ''.join(SystemRandom().choice(charset)
for _ in range(random_length))
if id not in existing_ids:
return id
raise ValueError(f"Could not generate ID unique from '{existing_ids}' "
f"using values '{charset}' and length of '{length}'.")
def wait(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
if os.name == 'nt':
return wait_windows(inputs)
return wait_linux(inputs)
def wait_windows(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
all_connections = [i for i in inputs if type(i) is Connection] \
+ [i for i in inputs if type(i) is PipeConnection] \
+ [i._reader for i in inputs if type(i) is Queue]
ready = multi_wait(all_connections)
ready_inputs = [i for i in inputs if \
(type(i) is Connection and i in ready) \
or (type(i) is PipeConnection and i in ready) \
or (type(i) is Queue and i._reader in ready)]
return ready_inputs
def wait_linux(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
all_connections = [i for i in inputs if type(i) is Connection] \
+ [i._reader for i in inputs if type(i) is Queue]
ready = multi_wait(all_connections)
ready_inputs = [i for i in inputs if \
(type(i) is Connection and i in ready) \
or (type(i) is Queue and i._reader in ready)]
return ready_inputs
def _get_file_sha256(file_path):
sha256_hash = hashlib.sha256()
with open(file_path, 'rb') as file_to_hash:
while True:
buffer = file_to_hash.read(HASH_BUFFER_SIZE)
if not buffer:
break
sha256_hash.update(buffer)
return sha256_hash.hexdigest()
def get_file_hash(file_path:str, hash:str, hint:str=""):
check_type(hash, str, hint=hint)
valid_existing_file_path(file_path)
valid_hashes = {
SHA256: _get_file_sha256
}
if hash not in valid_hashes:
raise KeyError(f"Cannot use hash '{hash}'. Valid are "
f"'{list(valid_hashes.keys())}")
return valid_hashes[hash](file_path)
def rmtree(directory:str):
"""
Remove a directory and all its contents.
Should be faster than shutil.rmtree
:param: (str) The firectory to empty and remove
:return: No return
"""
if not os.path.exists(directory):
return
for root, dirs, files in os.walk(directory, topdown=False):
for file in files:
os.remove(os.path.join(root, file))
for dir in dirs:
rmtree(os.path.join(root, dir))
os.rmdir(directory)
def make_dir(path:str, can_exist:bool=True, ensure_clean:bool=False):
"""
Creates a new directory at the given path.
:param path: (str) The directory path.
:param can_exist: (boolean) [optional] A toggle for if a previously
existing directory at the path will throw an error or not. Default is
true (e.g. no error is thrown if the path already exists)
:param ensure_clean: (boolean) [optional] A toggle for if a previously
existing directory at the path will be replaced with a new emtpy directory.
Default is False.
:return: No return
"""
if os.path.exists(path):
if os.path.isfile(path):
raise ValueError(
f"Cannot make directory in {path} as it already exists and is "
"a file")
if ensure_clean:
rmtree(path)
os.makedirs(path, exist_ok=can_exist)
def read_file(filepath:str):
with open(filepath, 'r') as file:
return file.read()
def read_file_lines(filepath:str):
with open(filepath, 'r') as file:
return file.readlines()
def write_file(source:str, filename:str):
with open(filename, 'w') as file:
file.write(source)
def read_yaml(filepath:str):
"""
Reads a file path as a yaml object.
:param filepath: (str) The file to read.
:return: (object) An object read from the file.
"""
with open(filepath, 'r') as yaml_file:
return yaml.load(yaml_file, Loader=yaml.Loader)
def write_yaml(source:Any, filename:str):
"""
Writes a given objcet to a yaml file.
:param source: (any) A python object to be written.
:param filename: (str) The filename to be written to.
:return: No return
"""
with open(filename, 'w') as param_file:
yaml.dump(source, param_file, default_flow_style=False)
def read_notebook(filepath:str):
valid_path(filepath, extension="ipynb")
with open(filepath, 'r') as read_file:
return json.load(read_file)
def write_notebook(source:Dict[str,Any], filename:str):
"""
Writes the given notebook source code to a given filename.
:param source: (dict) The notebook source dictionary.
:param filename: (str) The filename to write to.
:return: No return
"""
with open(filename, 'w') as job_file:
json.dump(source, job_file)
# Adapted from: https://github.com/rasmunk/notebook_parameterizer
def parameterize_jupyter_notebook(jupyter_notebook:Dict[str,Any],
parameters:Dict[str,Any], expand_env_values:bool=False)->Dict[str,Any]:
nbformat.validate(jupyter_notebook)
check_type(parameters, Dict,
hint="parameterize_jupyter_notebook.parameters")
if jupyter_notebook["nbformat"] != 4:
raise Warning(
"Parameterization designed to work with nbformat version 4. "
f"Differing version of '{jupyter_notebook['nbformat']}' may "
"produce unexpeted results.")
# Load input notebook
if "kernelspec" in jupyter_notebook["metadata"]:
kernel_name = jupyter_notebook["metadata"]["kernelspec"]["name"]
language = jupyter_notebook["metadata"]["kernelspec"]["language"]
if "language_info" in jupyter_notebook["metadata"]:
kernel_name = jupyter_notebook["metadata"]["language_info"]["name"]
language = jupyter_notebook["metadata"]["language_info"]["name"]
else:
raise AttributeError(
f"Notebook lacks key language and/or kernel_name attributes "
"within metadata")
translator = papermill_translators.find_translator(kernel_name, language)
output_notebook = copy.deepcopy(jupyter_notebook)
# Find each
cells = output_notebook["cells"]
code_cells = [
(idx, cell) for idx, cell in enumerate(cells) \
if cell["cell_type"] == "code"
]
for idx, cell in code_cells:
cell_updated = False
source = cell["source"]
# Either single string or a list of strings
if isinstance(source, str):
lines = source.split("\n")
else:
lines = source
for idy, line in enumerate(lines):
if "=" in line:
d_line = list(map(lambda x: x.replace(" ", ""),
line.split("=")))
# Matching parameter name
if len(d_line) == 2 and d_line[0] in parameters:
value = parameters[d_line[0]]
# Whether to expand value from os env
if (
expand_env_values
and isinstance(value, str)
and value.startswith("ENV_")
):
env_var = value.replace("ENV_", "")
value = os.getenv(
env_var,
"MISSING ENVIRONMENT VARIABLE: {}".format(env_var)
)
lines[idy] = translator.assign(
d_line[0], translator.translate(value)
)
cell_updated = True
if cell_updated:
cells[idx]["source"] = "\n".join(lines)
# Validate that the parameterized notebook is still valid
nbformat.validate(output_notebook, version=4)
return output_notebook
def parameterize_python_script(script:List[str], parameters:Dict[str,Any],
expand_env_values:bool=False)->Dict[str,Any]:
check_script(script)
check_type(parameters, Dict
,hint="parameterize_python_script.parameters")
output_script = copy.deepcopy(script)
for i, line in enumerate(output_script):
if "=" in line:
d_line = list(map(lambda x: x.replace(" ", ""),
line.split("=")))
# Matching parameter name
if len(d_line) == 2 and d_line[0] in parameters:
value = parameters[d_line[0]]
# Whether to expand value from os env
if (
expand_env_values
and isinstance(value, str)
and value.startswith("ENV_")
):
env_var = value.replace("ENV_", "")
value = os.getenv(
env_var,
"MISSING ENVIRONMENT VARIABLE: {}".format(env_var)
)
output_script[i] = f"{d_line[0]} = {repr(value)}"
# Validate that the parameterized notebook is still valid
check_script(output_script)
return output_script
def print_debug(print_target, debug_level, msg, level)->None:
"""Function to print a message to the debug target, if its level exceeds
the given one."""
if print_target is None:
return
else:
if level <= debug_level:
status = "ERROR"
if level == DEBUG_INFO:
status = "INFO"
elif level == DEBUG_WARNING:
status = "WARNING"
print(f"{status}: {msg}", file=print_target)
def replace_keywords(old_dict:Dict[str,str], job_id:str, src_path:str,
monitor_base:str)->Dict[str,str]:
"""Function to replace all MEOW magic words in a dictionary with dynamic
values."""
new_dict = {}
filename = os.path.basename(src_path)
dirname = os.path.dirname(src_path)
relpath = os.path.relpath(src_path, monitor_base)
reldirname = os.path.dirname(relpath)
(prefix, extension) = os.path.splitext(filename)
for var, val in old_dict.items():
if isinstance(val, str):
val = val.replace(KEYWORD_PATH, src_path)
val = val.replace(KEYWORD_REL_PATH, relpath)
val = val.replace(KEYWORD_DIR, dirname)
val = val.replace(KEYWORD_REL_DIR, reldirname)
val = val.replace(KEYWORD_FILENAME, filename)
val = val.replace(KEYWORD_PREFIX, prefix)
val = val.replace(KEYWORD_BASE, monitor_base)
val = val.replace(KEYWORD_EXTENSION, extension)
val = val.replace(KEYWORD_JOB, job_id)
new_dict[var] = val
else:
new_dict[var] = val
return new_dict
def create_event(event_type:str, path:str, rule:Any, extras:Dict[Any,Any]={}
)->Dict[Any,Any]:
"""Function to create a MEOW dictionary."""
return {
**extras,
EVENT_PATH: path,
EVENT_TYPE: event_type,
EVENT_RULE: rule
}
def create_watchdog_event(path:str, rule:Any, base:str, hash:str,
extras:Dict[Any,Any]={})->Dict[Any,Any]:
"""Function to create a MEOW event dictionary."""
return create_event(
EVENT_TYPE_WATCHDOG,
path,
rule,
extras={
**extras,
**{
WATCHDOG_HASH: hash,
WATCHDOG_BASE: base
}
}
)
def create_job(job_type:str, event:Dict[str,Any], extras:Dict[Any,Any]={}
)->Dict[Any,Any]:
"""Function to create a MEOW job dictionary."""
job_dict = {
#TODO compress event?
JOB_ID: generate_id(prefix="job_"),
JOB_EVENT: event,
JOB_TYPE: job_type,
JOB_PATTERN: event[EVENT_RULE].pattern.name,
JOB_RECIPE: event[EVENT_RULE].recipe.name,
JOB_RULE: event[EVENT_RULE].name,
JOB_STATUS: STATUS_QUEUED,
JOB_CREATE_TIME: datetime.now(),
JOB_REQUIREMENTS: event[EVENT_RULE].recipe.requirements
}
return {**extras, **job_dict}
def lines_to_string(lines:List[str])->str:
"""Function to convert a list of str lines, into one continuous string
separated by newline characters"""
return "\n".join(lines)

View File

@ -19,7 +19,7 @@ from core.correctness.vars import VALID_RECIPE_NAME_CHARS, \
SWEEP_JUMP, SWEEP_START, SWEEP_STOP, get_drt_imp_msg
from core.correctness.validation import valid_string, check_type, \
check_implementation, valid_list, valid_dict
from core.functionality import generate_id
from functionality.naming import generate_id
class BaseRecipe:

View File

@ -15,12 +15,12 @@ from random import randrange
from typing import Any, Union, Dict, List
from core.correctness.vars import DEBUG_WARNING, DEBUG_INFO, EVENT_TYPE, \
VALID_CHANNELS, JOB_ID, META_FILE, DEFAULT_JOB_OUTPUT_DIR, \
DEFAULT_JOB_QUEUE_DIR
from core.correctness.validation import setup_debugging, check_type, \
valid_list, valid_dir_path
from core.functionality import print_debug, wait, read_yaml, make_dir
VALID_CHANNELS, META_FILE, DEFAULT_JOB_OUTPUT_DIR, DEFAULT_JOB_QUEUE_DIR
from core.correctness.validation import check_type, valid_list, valid_dir_path
from core.meow import BaseHandler, BaseMonitor, BaseConductor
from functionality.debug import setup_debugging, print_debug
from functionality.file_io import make_dir, read_yaml
from functionality.process_io import wait
class MeowRunner: