split functionality file across new module as was getting too long on its own. All functions remian the same, but imports will need to be updated.

This commit is contained in:
PatchOfScotland
2023-02-10 14:46:46 +01:00
parent dfd83e28dc
commit 7f8b4f3e32
23 changed files with 540 additions and 464 deletions

42
functionality/debug.py Normal file
View File

@ -0,0 +1,42 @@
"""
This file contains functions for debugging and logging.
Author(s): David Marchant
"""
from typing import Any, Tuple
from core.correctness.validation import check_type
from core.correctness.vars import DEBUG_INFO, DEBUG_WARNING
def setup_debugging(print:Any=None, logging:int=0)->Tuple[Any,int]:
"""Create a place for debug messages to be sent. Always returns a place,
along with a logging level."""
check_type(logging, int)
if print is None:
return None, 0
else:
if not isinstance(print, object):
raise TypeError(f"Invalid print location provided")
writeable = getattr(print, "write", None)
if not writeable or not callable(writeable):
raise TypeError(f"Print object does not implement required "
"'write' function")
return print, logging
def print_debug(print_target, debug_level, msg, level)->None:
"""Function to print a message to the debug target, if its level exceeds
the given one."""
if print_target is None:
return
else:
if level <= debug_level:
status = "ERROR"
if level == DEBUG_INFO:
status = "INFO"
elif level == DEBUG_WARNING:
status = "WARNING"
print(f"{status}: {msg}", file=print_target)

118
functionality/file_io.py Normal file
View File

@ -0,0 +1,118 @@
"""
This file contains functions for reading and writing different types of files.
Author(s): David Marchant
"""
import json
import yaml
from os import makedirs, remove, rmdir, walk
from os.path import exists, isfile, join
from typing import Any, Dict, List
from core.correctness.validation import valid_path
def make_dir(path:str, can_exist:bool=True, ensure_clean:bool=False):
"""
Creates a new directory at the given path.
:param path: (str) The directory path.
:param can_exist: (boolean) [optional] A toggle for if a previously
existing directory at the path will throw an error or not. Default is
true (e.g. no error is thrown if the path already exists)
:param ensure_clean: (boolean) [optional] A toggle for if a previously
existing directory at the path will be replaced with a new emtpy directory.
Default is False.
:return: No return
"""
if exists(path):
if isfile(path):
raise ValueError(
f"Cannot make directory in {path} as it already exists and is "
"a file")
if ensure_clean:
rmtree(path)
makedirs(path, exist_ok=can_exist)
def rmtree(directory:str):
"""
Remove a directory and all its contents.
Should be faster than shutil.rmtree
:param: (str) The firectory to empty and remove
:return: No return
"""
if not exists(directory):
return
for root, dirs, files in walk(directory, topdown=False):
for file in files:
remove(join(root, file))
for dir in dirs:
rmtree(join(root, dir))
rmdir(directory)
def read_file(filepath:str):
with open(filepath, 'r') as file:
return file.read()
def read_file_lines(filepath:str):
with open(filepath, 'r') as file:
return file.readlines()
def write_file(source:str, filename:str):
with open(filename, 'w') as file:
file.write(source)
def read_yaml(filepath:str):
"""
Reads a file path as a yaml object.
:param filepath: (str) The file to read.
:return: (object) An object read from the file.
"""
with open(filepath, 'r') as yaml_file:
return yaml.load(yaml_file, Loader=yaml.Loader)
def write_yaml(source:Any, filename:str):
"""
Writes a given objcet to a yaml file.
:param source: (any) A python object to be written.
:param filename: (str) The filename to be written to.
:return: No return
"""
with open(filename, 'w') as param_file:
yaml.dump(source, param_file, default_flow_style=False)
def read_notebook(filepath:str):
valid_path(filepath, extension="ipynb")
with open(filepath, 'r') as read_file:
return json.load(read_file)
def write_notebook(source:Dict[str,Any], filename:str):
"""
Writes the given notebook source code to a given filename.
:param source: (dict) The notebook source dictionary.
:param filename: (str) The filename to write to.
:return: No return
"""
with open(filename, 'w') as job_file:
json.dump(source, job_file)
def lines_to_string(lines:List[str])->str:
"""Function to convert a list of str lines, into one continuous string
separated by newline characters"""
return "\n".join(lines)

36
functionality/hashing.py Normal file
View File

@ -0,0 +1,36 @@
"""
This file contains functions for taking hashes of data and files.
Author(s): David Marchant
"""
from hashlib import sha256
from core.correctness.vars import HASH_BUFFER_SIZE, SHA256
from core.correctness.validation import check_type, valid_existing_file_path
def _get_file_sha256(file_path):
sha256_hash = sha256()
with open(file_path, 'rb') as file_to_hash:
while True:
buffer = file_to_hash.read(HASH_BUFFER_SIZE)
if not buffer:
break
sha256_hash.update(buffer)
return sha256_hash.hexdigest()
def get_file_hash(file_path:str, hash:str, hint:str=""):
check_type(hash, str, hint=hint)
valid_existing_file_path(file_path)
valid_hashes = {
SHA256: _get_file_sha256
}
if hash not in valid_hashes:
raise KeyError(f"Cannot use hash '{hash}'. Valid are "
f"'{list(valid_hashes.keys())}")
return valid_hashes[hash](file_path)

103
functionality/meow.py Normal file
View File

@ -0,0 +1,103 @@
"""
This file contains functions for meow specific functionality.
Author(s): David Marchant
"""
from datetime import datetime
from os.path import basename, dirname, relpath, splitext
from typing import Any, Dict
from core.correctness.vars import EVENT_PATH, EVENT_RULE, EVENT_TYPE, \
EVENT_TYPE_WATCHDOG, JOB_CREATE_TIME, JOB_EVENT, JOB_ID, JOB_PATTERN, \
JOB_RECIPE, JOB_REQUIREMENTS, JOB_RULE, JOB_STATUS, JOB_TYPE, \
STATUS_QUEUED, WATCHDOG_BASE, WATCHDOG_HASH
from functionality.naming import generate_id
# mig trigger keyword replacements
KEYWORD_PATH = "{PATH}"
KEYWORD_REL_PATH = "{REL_PATH}"
KEYWORD_DIR = "{DIR}"
KEYWORD_REL_DIR = "{REL_DIR}"
KEYWORD_FILENAME = "{FILENAME}"
KEYWORD_PREFIX = "{PREFIX}"
KEYWORD_BASE = "{VGRID}"
KEYWORD_EXTENSION = "{EXTENSION}"
KEYWORD_JOB = "{JOB}"
def replace_keywords(old_dict:Dict[str,str], job_id:str, src_path:str,
monitor_base:str)->Dict[str,str]:
"""Function to replace all MEOW magic words in a dictionary with dynamic
values."""
new_dict = {}
filename = basename(src_path)
dir = dirname(src_path)
relativepath = relpath(src_path, monitor_base)
reldirname = dirname(relativepath)
(prefix, extension) = splitext(filename)
for var, val in old_dict.items():
if isinstance(val, str):
val = val.replace(KEYWORD_PATH, src_path)
val = val.replace(KEYWORD_REL_PATH, relativepath)
val = val.replace(KEYWORD_DIR, dir)
val = val.replace(KEYWORD_REL_DIR, reldirname)
val = val.replace(KEYWORD_FILENAME, filename)
val = val.replace(KEYWORD_PREFIX, prefix)
val = val.replace(KEYWORD_BASE, monitor_base)
val = val.replace(KEYWORD_EXTENSION, extension)
val = val.replace(KEYWORD_JOB, job_id)
new_dict[var] = val
else:
new_dict[var] = val
return new_dict
def create_event(event_type:str, path:str, rule:Any, extras:Dict[Any,Any]={}
)->Dict[Any,Any]:
"""Function to create a MEOW dictionary."""
return {
**extras,
EVENT_PATH: path,
EVENT_TYPE: event_type,
EVENT_RULE: rule
}
def create_watchdog_event(path:str, rule:Any, base:str, hash:str,
extras:Dict[Any,Any]={})->Dict[Any,Any]:
"""Function to create a MEOW event dictionary."""
return create_event(
EVENT_TYPE_WATCHDOG,
path,
rule,
extras={
**extras,
**{
WATCHDOG_HASH: hash,
WATCHDOG_BASE: base
}
}
)
def create_job(job_type:str, event:Dict[str,Any], extras:Dict[Any,Any]={}
)->Dict[Any,Any]:
"""Function to create a MEOW job dictionary."""
job_dict = {
#TODO compress event?
JOB_ID: generate_id(prefix="job_"),
JOB_EVENT: event,
JOB_TYPE: job_type,
JOB_PATTERN: event[EVENT_RULE].pattern.name,
JOB_RECIPE: event[EVENT_RULE].recipe.name,
JOB_RULE: event[EVENT_RULE].name,
JOB_STATUS: STATUS_QUEUED,
JOB_CREATE_TIME: datetime.now(),
JOB_REQUIREMENTS: event[EVENT_RULE].recipe.requirements
}
return {**extras, **job_dict}

23
functionality/naming.py Normal file
View File

@ -0,0 +1,23 @@
"""
This file contains functions for dynamic naming of objects.
Author(s): David Marchant
"""
from typing import List
from random import SystemRandom
from core.correctness.vars import CHAR_LOWERCASE, CHAR_UPPERCASE
#TODO Make this guaranteed unique
def generate_id(prefix:str="", length:int=16, existing_ids:List[str]=[],
charset:str=CHAR_UPPERCASE+CHAR_LOWERCASE, attempts:int=24):
random_length = max(length - len(prefix), 0)
for _ in range(attempts):
id = prefix + ''.join(SystemRandom().choice(charset)
for _ in range(random_length))
if id not in existing_ids:
return id
raise ValueError(f"Could not generate ID unique from '{existing_ids}' "
f"using values '{charset}' and length of '{length}'.")

View File

@ -0,0 +1,121 @@
"""
This file contains functions for parameterising code in various formats.
Author(s): David Marchant
"""
from copy import deepcopy
from nbformat import validate
from os import getenv
from papermill.translators import papermill_translators
from typing import Any, Dict, List
from core.correctness.validation import check_script, check_type
# Adapted from: https://github.com/rasmunk/notebook_parameterizer
def parameterize_jupyter_notebook(jupyter_notebook:Dict[str,Any],
parameters:Dict[str,Any], expand_env_values:bool=False)->Dict[str,Any]:
validate(jupyter_notebook)
check_type(parameters, Dict,
hint="parameterize_jupyter_notebook.parameters")
if jupyter_notebook["nbformat"] != 4:
raise Warning(
"Parameterization designed to work with nbformat version 4. "
f"Differing version of '{jupyter_notebook['nbformat']}' may "
"produce unexpeted results.")
# Load input notebook
if "kernelspec" in jupyter_notebook["metadata"]:
kernel_name = jupyter_notebook["metadata"]["kernelspec"]["name"]
language = jupyter_notebook["metadata"]["kernelspec"]["language"]
if "language_info" in jupyter_notebook["metadata"]:
kernel_name = jupyter_notebook["metadata"]["language_info"]["name"]
language = jupyter_notebook["metadata"]["language_info"]["name"]
else:
raise AttributeError(
f"Notebook lacks key language and/or kernel_name attributes "
"within metadata")
translator = papermill_translators.find_translator(kernel_name, language)
output_notebook = deepcopy(jupyter_notebook)
# Find each
cells = output_notebook["cells"]
code_cells = [
(idx, cell) for idx, cell in enumerate(cells) \
if cell["cell_type"] == "code"
]
for idx, cell in code_cells:
cell_updated = False
source = cell["source"]
# Either single string or a list of strings
if isinstance(source, str):
lines = source.split("\n")
else:
lines = source
for idy, line in enumerate(lines):
if "=" in line:
d_line = list(map(lambda x: x.replace(" ", ""),
line.split("=")))
# Matching parameter name
if len(d_line) == 2 and d_line[0] in parameters:
value = parameters[d_line[0]]
# Whether to expand value from os env
if (
expand_env_values
and isinstance(value, str)
and value.startswith("ENV_")
):
env_var = value.replace("ENV_", "")
value = getenv(
env_var,
"MISSING ENVIRONMENT VARIABLE: {}".format(env_var)
)
lines[idy] = translator.assign(
d_line[0], translator.translate(value)
)
cell_updated = True
if cell_updated:
cells[idx]["source"] = "\n".join(lines)
# Validate that the parameterized notebook is still valid
validate(output_notebook, version=4)
return output_notebook
def parameterize_python_script(script:List[str], parameters:Dict[str,Any],
expand_env_values:bool=False)->Dict[str,Any]:
check_script(script)
check_type(parameters, Dict
,hint="parameterize_python_script.parameters")
output_script = deepcopy(script)
for i, line in enumerate(output_script):
if "=" in line:
d_line = list(map(lambda x: x.replace(" ", ""),
line.split("=")))
# Matching parameter name
if len(d_line) == 2 and d_line[0] in parameters:
value = parameters[d_line[0]]
# Whether to expand value from os env
if (
expand_env_values
and isinstance(value, str)
and value.startswith("ENV_")
):
env_var = value.replace("ENV_", "")
value = getenv(
env_var,
"MISSING ENVIRONMENT VARIABLE: {}".format(env_var)
)
output_script[i] = f"{d_line[0]} = {repr(value)}"
# Validate that the parameterized notebook is still valid
check_script(output_script)
return output_script

View File

@ -0,0 +1,41 @@
"""
This file contains functions for reading and writing between processes.
Author(s): David Marchant
"""
from os import name as osName
from typing import List
from multiprocessing.connection import Connection, wait as multi_wait
# Need to import additional Connection type for Windows machines
if osName == 'nt':
from multiprocessing.connection import PipeConnection
from multiprocessing.queues import Queue
from core.correctness.vars import VALID_CHANNELS
def wait(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
if osName == 'nt':
return wait_windows(inputs)
return wait_linux(inputs)
def wait_windows(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
all_connections = [i for i in inputs if type(i) is Connection] \
+ [i for i in inputs if type(i) is PipeConnection] \
+ [i._reader for i in inputs if type(i) is Queue]
ready = multi_wait(all_connections)
ready_inputs = [i for i in inputs if \
(type(i) is Connection and i in ready) \
or (type(i) is PipeConnection and i in ready) \
or (type(i) is Queue and i._reader in ready)]
return ready_inputs
def wait_linux(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
all_connections = [i for i in inputs if type(i) is Connection] \
+ [i._reader for i in inputs if type(i) is Queue]
ready = multi_wait(all_connections)
ready_inputs = [i for i in inputs if \
(type(i) is Connection and i in ready) \
or (type(i) is Queue and i._reader in ready)]
return ready_inputs