split functionality file across new module as was getting too long on its own. All functions remian the same, but imports will need to be updated.
This commit is contained in:
42
functionality/debug.py
Normal file
42
functionality/debug.py
Normal file
@ -0,0 +1,42 @@
|
||||
"""
|
||||
This file contains functions for debugging and logging.
|
||||
|
||||
Author(s): David Marchant
|
||||
"""
|
||||
|
||||
from typing import Any, Tuple
|
||||
|
||||
from core.correctness.validation import check_type
|
||||
from core.correctness.vars import DEBUG_INFO, DEBUG_WARNING
|
||||
|
||||
|
||||
def setup_debugging(print:Any=None, logging:int=0)->Tuple[Any,int]:
|
||||
"""Create a place for debug messages to be sent. Always returns a place,
|
||||
along with a logging level."""
|
||||
check_type(logging, int)
|
||||
if print is None:
|
||||
return None, 0
|
||||
else:
|
||||
if not isinstance(print, object):
|
||||
raise TypeError(f"Invalid print location provided")
|
||||
writeable = getattr(print, "write", None)
|
||||
if not writeable or not callable(writeable):
|
||||
raise TypeError(f"Print object does not implement required "
|
||||
"'write' function")
|
||||
|
||||
return print, logging
|
||||
|
||||
|
||||
def print_debug(print_target, debug_level, msg, level)->None:
|
||||
"""Function to print a message to the debug target, if its level exceeds
|
||||
the given one."""
|
||||
if print_target is None:
|
||||
return
|
||||
else:
|
||||
if level <= debug_level:
|
||||
status = "ERROR"
|
||||
if level == DEBUG_INFO:
|
||||
status = "INFO"
|
||||
elif level == DEBUG_WARNING:
|
||||
status = "WARNING"
|
||||
print(f"{status}: {msg}", file=print_target)
|
118
functionality/file_io.py
Normal file
118
functionality/file_io.py
Normal file
@ -0,0 +1,118 @@
|
||||
"""
|
||||
This file contains functions for reading and writing different types of files.
|
||||
|
||||
Author(s): David Marchant
|
||||
"""
|
||||
|
||||
import json
|
||||
import yaml
|
||||
|
||||
from os import makedirs, remove, rmdir, walk
|
||||
from os.path import exists, isfile, join
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from core.correctness.validation import valid_path
|
||||
|
||||
|
||||
def make_dir(path:str, can_exist:bool=True, ensure_clean:bool=False):
|
||||
"""
|
||||
Creates a new directory at the given path.
|
||||
|
||||
:param path: (str) The directory path.
|
||||
|
||||
:param can_exist: (boolean) [optional] A toggle for if a previously
|
||||
existing directory at the path will throw an error or not. Default is
|
||||
true (e.g. no error is thrown if the path already exists)
|
||||
|
||||
:param ensure_clean: (boolean) [optional] A toggle for if a previously
|
||||
existing directory at the path will be replaced with a new emtpy directory.
|
||||
Default is False.
|
||||
|
||||
:return: No return
|
||||
"""
|
||||
if exists(path):
|
||||
if isfile(path):
|
||||
raise ValueError(
|
||||
f"Cannot make directory in {path} as it already exists and is "
|
||||
"a file")
|
||||
if ensure_clean:
|
||||
rmtree(path)
|
||||
|
||||
makedirs(path, exist_ok=can_exist)
|
||||
|
||||
def rmtree(directory:str):
|
||||
"""
|
||||
Remove a directory and all its contents.
|
||||
Should be faster than shutil.rmtree
|
||||
|
||||
:param: (str) The firectory to empty and remove
|
||||
|
||||
:return: No return
|
||||
"""
|
||||
if not exists(directory):
|
||||
return
|
||||
for root, dirs, files in walk(directory, topdown=False):
|
||||
for file in files:
|
||||
remove(join(root, file))
|
||||
for dir in dirs:
|
||||
rmtree(join(root, dir))
|
||||
rmdir(directory)
|
||||
|
||||
def read_file(filepath:str):
|
||||
with open(filepath, 'r') as file:
|
||||
return file.read()
|
||||
|
||||
def read_file_lines(filepath:str):
|
||||
with open(filepath, 'r') as file:
|
||||
return file.readlines()
|
||||
|
||||
def write_file(source:str, filename:str):
|
||||
with open(filename, 'w') as file:
|
||||
file.write(source)
|
||||
|
||||
def read_yaml(filepath:str):
|
||||
"""
|
||||
Reads a file path as a yaml object.
|
||||
|
||||
:param filepath: (str) The file to read.
|
||||
|
||||
:return: (object) An object read from the file.
|
||||
"""
|
||||
with open(filepath, 'r') as yaml_file:
|
||||
return yaml.load(yaml_file, Loader=yaml.Loader)
|
||||
|
||||
def write_yaml(source:Any, filename:str):
|
||||
"""
|
||||
Writes a given objcet to a yaml file.
|
||||
|
||||
:param source: (any) A python object to be written.
|
||||
|
||||
:param filename: (str) The filename to be written to.
|
||||
|
||||
:return: No return
|
||||
"""
|
||||
with open(filename, 'w') as param_file:
|
||||
yaml.dump(source, param_file, default_flow_style=False)
|
||||
|
||||
def read_notebook(filepath:str):
|
||||
valid_path(filepath, extension="ipynb")
|
||||
with open(filepath, 'r') as read_file:
|
||||
return json.load(read_file)
|
||||
|
||||
def write_notebook(source:Dict[str,Any], filename:str):
|
||||
"""
|
||||
Writes the given notebook source code to a given filename.
|
||||
|
||||
:param source: (dict) The notebook source dictionary.
|
||||
|
||||
:param filename: (str) The filename to write to.
|
||||
|
||||
:return: No return
|
||||
"""
|
||||
with open(filename, 'w') as job_file:
|
||||
json.dump(source, job_file)
|
||||
|
||||
def lines_to_string(lines:List[str])->str:
|
||||
"""Function to convert a list of str lines, into one continuous string
|
||||
separated by newline characters"""
|
||||
return "\n".join(lines)
|
36
functionality/hashing.py
Normal file
36
functionality/hashing.py
Normal file
@ -0,0 +1,36 @@
|
||||
"""
|
||||
This file contains functions for taking hashes of data and files.
|
||||
|
||||
Author(s): David Marchant
|
||||
"""
|
||||
|
||||
from hashlib import sha256
|
||||
|
||||
from core.correctness.vars import HASH_BUFFER_SIZE, SHA256
|
||||
from core.correctness.validation import check_type, valid_existing_file_path
|
||||
|
||||
def _get_file_sha256(file_path):
|
||||
sha256_hash = sha256()
|
||||
|
||||
with open(file_path, 'rb') as file_to_hash:
|
||||
while True:
|
||||
buffer = file_to_hash.read(HASH_BUFFER_SIZE)
|
||||
if not buffer:
|
||||
break
|
||||
sha256_hash.update(buffer)
|
||||
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
def get_file_hash(file_path:str, hash:str, hint:str=""):
|
||||
check_type(hash, str, hint=hint)
|
||||
|
||||
valid_existing_file_path(file_path)
|
||||
|
||||
valid_hashes = {
|
||||
SHA256: _get_file_sha256
|
||||
}
|
||||
if hash not in valid_hashes:
|
||||
raise KeyError(f"Cannot use hash '{hash}'. Valid are "
|
||||
f"'{list(valid_hashes.keys())}")
|
||||
|
||||
return valid_hashes[hash](file_path)
|
103
functionality/meow.py
Normal file
103
functionality/meow.py
Normal file
@ -0,0 +1,103 @@
|
||||
"""
|
||||
This file contains functions for meow specific functionality.
|
||||
|
||||
Author(s): David Marchant
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from os.path import basename, dirname, relpath, splitext
|
||||
from typing import Any, Dict
|
||||
|
||||
from core.correctness.vars import EVENT_PATH, EVENT_RULE, EVENT_TYPE, \
|
||||
EVENT_TYPE_WATCHDOG, JOB_CREATE_TIME, JOB_EVENT, JOB_ID, JOB_PATTERN, \
|
||||
JOB_RECIPE, JOB_REQUIREMENTS, JOB_RULE, JOB_STATUS, JOB_TYPE, \
|
||||
STATUS_QUEUED, WATCHDOG_BASE, WATCHDOG_HASH
|
||||
from functionality.naming import generate_id
|
||||
|
||||
|
||||
# mig trigger keyword replacements
|
||||
KEYWORD_PATH = "{PATH}"
|
||||
KEYWORD_REL_PATH = "{REL_PATH}"
|
||||
KEYWORD_DIR = "{DIR}"
|
||||
KEYWORD_REL_DIR = "{REL_DIR}"
|
||||
KEYWORD_FILENAME = "{FILENAME}"
|
||||
KEYWORD_PREFIX = "{PREFIX}"
|
||||
KEYWORD_BASE = "{VGRID}"
|
||||
KEYWORD_EXTENSION = "{EXTENSION}"
|
||||
KEYWORD_JOB = "{JOB}"
|
||||
|
||||
|
||||
def replace_keywords(old_dict:Dict[str,str], job_id:str, src_path:str,
|
||||
monitor_base:str)->Dict[str,str]:
|
||||
"""Function to replace all MEOW magic words in a dictionary with dynamic
|
||||
values."""
|
||||
new_dict = {}
|
||||
|
||||
filename = basename(src_path)
|
||||
dir = dirname(src_path)
|
||||
relativepath = relpath(src_path, monitor_base)
|
||||
reldirname = dirname(relativepath)
|
||||
(prefix, extension) = splitext(filename)
|
||||
|
||||
for var, val in old_dict.items():
|
||||
if isinstance(val, str):
|
||||
val = val.replace(KEYWORD_PATH, src_path)
|
||||
val = val.replace(KEYWORD_REL_PATH, relativepath)
|
||||
val = val.replace(KEYWORD_DIR, dir)
|
||||
val = val.replace(KEYWORD_REL_DIR, reldirname)
|
||||
val = val.replace(KEYWORD_FILENAME, filename)
|
||||
val = val.replace(KEYWORD_PREFIX, prefix)
|
||||
val = val.replace(KEYWORD_BASE, monitor_base)
|
||||
val = val.replace(KEYWORD_EXTENSION, extension)
|
||||
val = val.replace(KEYWORD_JOB, job_id)
|
||||
|
||||
new_dict[var] = val
|
||||
else:
|
||||
new_dict[var] = val
|
||||
|
||||
return new_dict
|
||||
|
||||
def create_event(event_type:str, path:str, rule:Any, extras:Dict[Any,Any]={}
|
||||
)->Dict[Any,Any]:
|
||||
"""Function to create a MEOW dictionary."""
|
||||
return {
|
||||
**extras,
|
||||
EVENT_PATH: path,
|
||||
EVENT_TYPE: event_type,
|
||||
EVENT_RULE: rule
|
||||
}
|
||||
|
||||
def create_watchdog_event(path:str, rule:Any, base:str, hash:str,
|
||||
extras:Dict[Any,Any]={})->Dict[Any,Any]:
|
||||
"""Function to create a MEOW event dictionary."""
|
||||
return create_event(
|
||||
EVENT_TYPE_WATCHDOG,
|
||||
path,
|
||||
rule,
|
||||
extras={
|
||||
**extras,
|
||||
**{
|
||||
WATCHDOG_HASH: hash,
|
||||
WATCHDOG_BASE: base
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
def create_job(job_type:str, event:Dict[str,Any], extras:Dict[Any,Any]={}
|
||||
)->Dict[Any,Any]:
|
||||
"""Function to create a MEOW job dictionary."""
|
||||
job_dict = {
|
||||
#TODO compress event?
|
||||
JOB_ID: generate_id(prefix="job_"),
|
||||
JOB_EVENT: event,
|
||||
JOB_TYPE: job_type,
|
||||
JOB_PATTERN: event[EVENT_RULE].pattern.name,
|
||||
JOB_RECIPE: event[EVENT_RULE].recipe.name,
|
||||
JOB_RULE: event[EVENT_RULE].name,
|
||||
JOB_STATUS: STATUS_QUEUED,
|
||||
JOB_CREATE_TIME: datetime.now(),
|
||||
JOB_REQUIREMENTS: event[EVENT_RULE].recipe.requirements
|
||||
}
|
||||
|
||||
return {**extras, **job_dict}
|
||||
|
23
functionality/naming.py
Normal file
23
functionality/naming.py
Normal file
@ -0,0 +1,23 @@
|
||||
"""
|
||||
This file contains functions for dynamic naming of objects.
|
||||
|
||||
Author(s): David Marchant
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from random import SystemRandom
|
||||
|
||||
from core.correctness.vars import CHAR_LOWERCASE, CHAR_UPPERCASE
|
||||
|
||||
|
||||
#TODO Make this guaranteed unique
|
||||
def generate_id(prefix:str="", length:int=16, existing_ids:List[str]=[],
|
||||
charset:str=CHAR_UPPERCASE+CHAR_LOWERCASE, attempts:int=24):
|
||||
random_length = max(length - len(prefix), 0)
|
||||
for _ in range(attempts):
|
||||
id = prefix + ''.join(SystemRandom().choice(charset)
|
||||
for _ in range(random_length))
|
||||
if id not in existing_ids:
|
||||
return id
|
||||
raise ValueError(f"Could not generate ID unique from '{existing_ids}' "
|
||||
f"using values '{charset}' and length of '{length}'.")
|
121
functionality/parameterisation.py
Normal file
121
functionality/parameterisation.py
Normal file
@ -0,0 +1,121 @@
|
||||
"""
|
||||
This file contains functions for parameterising code in various formats.
|
||||
|
||||
Author(s): David Marchant
|
||||
"""
|
||||
|
||||
from copy import deepcopy
|
||||
from nbformat import validate
|
||||
from os import getenv
|
||||
from papermill.translators import papermill_translators
|
||||
from typing import Any, Dict, List
|
||||
|
||||
from core.correctness.validation import check_script, check_type
|
||||
|
||||
# Adapted from: https://github.com/rasmunk/notebook_parameterizer
|
||||
def parameterize_jupyter_notebook(jupyter_notebook:Dict[str,Any],
|
||||
parameters:Dict[str,Any], expand_env_values:bool=False)->Dict[str,Any]:
|
||||
validate(jupyter_notebook)
|
||||
check_type(parameters, Dict,
|
||||
hint="parameterize_jupyter_notebook.parameters")
|
||||
|
||||
if jupyter_notebook["nbformat"] != 4:
|
||||
raise Warning(
|
||||
"Parameterization designed to work with nbformat version 4. "
|
||||
f"Differing version of '{jupyter_notebook['nbformat']}' may "
|
||||
"produce unexpeted results.")
|
||||
|
||||
# Load input notebook
|
||||
if "kernelspec" in jupyter_notebook["metadata"]:
|
||||
kernel_name = jupyter_notebook["metadata"]["kernelspec"]["name"]
|
||||
language = jupyter_notebook["metadata"]["kernelspec"]["language"]
|
||||
if "language_info" in jupyter_notebook["metadata"]:
|
||||
kernel_name = jupyter_notebook["metadata"]["language_info"]["name"]
|
||||
language = jupyter_notebook["metadata"]["language_info"]["name"]
|
||||
else:
|
||||
raise AttributeError(
|
||||
f"Notebook lacks key language and/or kernel_name attributes "
|
||||
"within metadata")
|
||||
|
||||
translator = papermill_translators.find_translator(kernel_name, language)
|
||||
|
||||
output_notebook = deepcopy(jupyter_notebook)
|
||||
|
||||
# Find each
|
||||
cells = output_notebook["cells"]
|
||||
code_cells = [
|
||||
(idx, cell) for idx, cell in enumerate(cells) \
|
||||
if cell["cell_type"] == "code"
|
||||
]
|
||||
for idx, cell in code_cells:
|
||||
cell_updated = False
|
||||
source = cell["source"]
|
||||
# Either single string or a list of strings
|
||||
if isinstance(source, str):
|
||||
lines = source.split("\n")
|
||||
else:
|
||||
lines = source
|
||||
|
||||
for idy, line in enumerate(lines):
|
||||
if "=" in line:
|
||||
d_line = list(map(lambda x: x.replace(" ", ""),
|
||||
line.split("=")))
|
||||
# Matching parameter name
|
||||
if len(d_line) == 2 and d_line[0] in parameters:
|
||||
value = parameters[d_line[0]]
|
||||
# Whether to expand value from os env
|
||||
if (
|
||||
expand_env_values
|
||||
and isinstance(value, str)
|
||||
and value.startswith("ENV_")
|
||||
):
|
||||
env_var = value.replace("ENV_", "")
|
||||
value = getenv(
|
||||
env_var,
|
||||
"MISSING ENVIRONMENT VARIABLE: {}".format(env_var)
|
||||
)
|
||||
lines[idy] = translator.assign(
|
||||
d_line[0], translator.translate(value)
|
||||
)
|
||||
|
||||
cell_updated = True
|
||||
if cell_updated:
|
||||
cells[idx]["source"] = "\n".join(lines)
|
||||
|
||||
# Validate that the parameterized notebook is still valid
|
||||
validate(output_notebook, version=4)
|
||||
|
||||
return output_notebook
|
||||
|
||||
def parameterize_python_script(script:List[str], parameters:Dict[str,Any],
|
||||
expand_env_values:bool=False)->Dict[str,Any]:
|
||||
check_script(script)
|
||||
check_type(parameters, Dict
|
||||
,hint="parameterize_python_script.parameters")
|
||||
|
||||
output_script = deepcopy(script)
|
||||
|
||||
for i, line in enumerate(output_script):
|
||||
if "=" in line:
|
||||
d_line = list(map(lambda x: x.replace(" ", ""),
|
||||
line.split("=")))
|
||||
# Matching parameter name
|
||||
if len(d_line) == 2 and d_line[0] in parameters:
|
||||
value = parameters[d_line[0]]
|
||||
# Whether to expand value from os env
|
||||
if (
|
||||
expand_env_values
|
||||
and isinstance(value, str)
|
||||
and value.startswith("ENV_")
|
||||
):
|
||||
env_var = value.replace("ENV_", "")
|
||||
value = getenv(
|
||||
env_var,
|
||||
"MISSING ENVIRONMENT VARIABLE: {}".format(env_var)
|
||||
)
|
||||
output_script[i] = f"{d_line[0]} = {repr(value)}"
|
||||
|
||||
# Validate that the parameterized notebook is still valid
|
||||
check_script(output_script)
|
||||
|
||||
return output_script
|
41
functionality/process_io.py
Normal file
41
functionality/process_io.py
Normal file
@ -0,0 +1,41 @@
|
||||
"""
|
||||
This file contains functions for reading and writing between processes.
|
||||
|
||||
Author(s): David Marchant
|
||||
"""
|
||||
|
||||
from os import name as osName
|
||||
from typing import List
|
||||
|
||||
from multiprocessing.connection import Connection, wait as multi_wait
|
||||
# Need to import additional Connection type for Windows machines
|
||||
if osName == 'nt':
|
||||
from multiprocessing.connection import PipeConnection
|
||||
from multiprocessing.queues import Queue
|
||||
from core.correctness.vars import VALID_CHANNELS
|
||||
|
||||
|
||||
def wait(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
|
||||
if osName == 'nt':
|
||||
return wait_windows(inputs)
|
||||
return wait_linux(inputs)
|
||||
|
||||
def wait_windows(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
|
||||
all_connections = [i for i in inputs if type(i) is Connection] \
|
||||
+ [i for i in inputs if type(i) is PipeConnection] \
|
||||
+ [i._reader for i in inputs if type(i) is Queue]
|
||||
ready = multi_wait(all_connections)
|
||||
ready_inputs = [i for i in inputs if \
|
||||
(type(i) is Connection and i in ready) \
|
||||
or (type(i) is PipeConnection and i in ready) \
|
||||
or (type(i) is Queue and i._reader in ready)]
|
||||
return ready_inputs
|
||||
|
||||
def wait_linux(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
|
||||
all_connections = [i for i in inputs if type(i) is Connection] \
|
||||
+ [i._reader for i in inputs if type(i) is Queue]
|
||||
ready = multi_wait(all_connections)
|
||||
ready_inputs = [i for i in inputs if \
|
||||
(type(i) is Connection and i in ready) \
|
||||
or (type(i) is Queue and i._reader in ready)]
|
||||
return ready_inputs
|
Reference in New Issue
Block a user