split functionality file across new module as was getting too long on its own. All functions remian the same, but imports will need to be updated.

2023-02-10 14:46:46 +01:00
parent dfd83e28dc
commit 7f8b4f3e32
23 changed files with 540 additions and 464 deletions
--- a/functionality/debug.py
+++ b/functionality/debug.py
@@ -0,0 +1,42 @@
+"""
+This file contains functions for debugging and logging.
+
+Author(s): David Marchant
+"""
+
+from typing import Any, Tuple
+
+from core.correctness.validation import check_type
+from core.correctness.vars import DEBUG_INFO, DEBUG_WARNING
+
+
+def setup_debugging(print:Any=None, logging:int=0)->Tuple[Any,int]:
+    """Create a place for debug messages to be sent. Always returns a place, 
+    along with a logging level."""
+    check_type(logging, int)
+    if print is None:
+        return None, 0
+    else:
+        if not isinstance(print, object):
+            raise TypeError(f"Invalid print location provided")
+        writeable = getattr(print, "write", None)
+        if not writeable or not callable(writeable):
+            raise TypeError(f"Print object does not implement required "
+                "'write' function")
+
+    return print, logging
+
+
+def print_debug(print_target, debug_level, msg, level)->None:
+    """Function to print a message to the debug target, if its level exceeds 
+    the given one."""
+    if print_target is None:
+        return
+    else:
+        if level <= debug_level:
+            status = "ERROR"
+            if level == DEBUG_INFO:
+                status = "INFO"
+            elif level == DEBUG_WARNING:
+                status = "WARNING"
+            print(f"{status}: {msg}", file=print_target)
--- a/functionality/file_io.py
+++ b/functionality/file_io.py
@@ -0,0 +1,118 @@
+"""
+This file contains functions for reading and writing different types of files.
+
+Author(s): David Marchant
+"""
+
+import json
+import yaml
+
+from os import makedirs, remove, rmdir, walk
+from os.path import exists, isfile, join
+from typing import Any, Dict, List
+
+from core.correctness.validation import valid_path
+
+
+def make_dir(path:str, can_exist:bool=True, ensure_clean:bool=False):
+    """
+    Creates a new directory at the given path.
+
+    :param path: (str) The directory path.
+
+    :param can_exist: (boolean) [optional] A toggle for if a previously
+    existing directory at the path will throw an error or not. Default is
+    true (e.g. no error is thrown if the path already exists)
+
+    :param ensure_clean: (boolean) [optional] A toggle for if a previously
+    existing directory at the path will be replaced with a new emtpy directory.
+    Default is False.
+
+    :return: No return
+    """
+    if exists(path):
+        if isfile(path):
+            raise ValueError(
+                f"Cannot make directory in {path} as it already exists and is "
+                "a file")
+        if ensure_clean:
+            rmtree(path)
+                
+    makedirs(path, exist_ok=can_exist)
+
+def rmtree(directory:str):
+    """
+    Remove a directory and all its contents. 
+    Should be faster than shutil.rmtree
+    
+    :param: (str) The firectory to empty and remove
+
+    :return: No return
+    """
+    if not exists(directory):
+        return
+    for root, dirs, files in walk(directory, topdown=False):
+        for file in files:
+            remove(join(root, file))
+        for dir in dirs:
+            rmtree(join(root, dir))
+    rmdir(directory)
+
+def read_file(filepath:str):
+    with open(filepath, 'r') as file:
+        return file.read()
+
+def read_file_lines(filepath:str):
+    with open(filepath, 'r') as file:
+        return file.readlines()
+
+def write_file(source:str, filename:str):
+    with open(filename, 'w') as file:
+        file.write(source)
+
+def read_yaml(filepath:str):
+    """
+    Reads a file path as a yaml object.
+
+    :param filepath: (str) The file to read.
+
+    :return: (object) An object read from the file.
+    """
+    with open(filepath, 'r') as yaml_file:
+        return yaml.load(yaml_file, Loader=yaml.Loader)
+
+def write_yaml(source:Any, filename:str):
+    """
+    Writes a given objcet to a yaml file.
+
+    :param source: (any) A python object to be written.
+
+    :param filename: (str) The filename to be written to.
+
+    :return: No return
+    """
+    with open(filename, 'w') as param_file:
+        yaml.dump(source, param_file, default_flow_style=False)
+
+def read_notebook(filepath:str):
+    valid_path(filepath, extension="ipynb")
+    with open(filepath, 'r') as read_file:
+        return json.load(read_file)
+
+def write_notebook(source:Dict[str,Any], filename:str):
+    """
+    Writes the given notebook source code to a given filename.
+
+    :param source: (dict) The notebook source dictionary.
+
+    :param filename: (str) The filename to write to.
+
+    :return: No return
+    """
+    with open(filename, 'w') as job_file:
+        json.dump(source, job_file)
+
+def lines_to_string(lines:List[str])->str:
+    """Function to convert a list of str lines, into one continuous string 
+    separated by newline characters"""
+    return "\n".join(lines)
--- a/functionality/hashing.py
+++ b/functionality/hashing.py
@@ -0,0 +1,36 @@
+"""
+This file contains functions for taking hashes of data and files.
+
+Author(s): David Marchant
+"""
+
+from hashlib import sha256
+
+from core.correctness.vars import HASH_BUFFER_SIZE, SHA256
+from core.correctness.validation import check_type, valid_existing_file_path
+
+def _get_file_sha256(file_path):
+    sha256_hash = sha256()
+    
+    with open(file_path, 'rb') as file_to_hash:
+        while True:
+            buffer = file_to_hash.read(HASH_BUFFER_SIZE)
+            if not buffer:
+                break
+            sha256_hash.update(buffer)
+    
+    return sha256_hash.hexdigest()
+
+def get_file_hash(file_path:str, hash:str, hint:str=""):
+    check_type(hash, str, hint=hint)
+
+    valid_existing_file_path(file_path)
+
+    valid_hashes = {
+        SHA256: _get_file_sha256
+    }
+    if hash not in valid_hashes:
+        raise KeyError(f"Cannot use hash '{hash}'. Valid are "
+            f"'{list(valid_hashes.keys())}")
+
+    return valid_hashes[hash](file_path)
--- a/functionality/meow.py
+++ b/functionality/meow.py
@@ -0,0 +1,103 @@
+"""
+This file contains functions for meow specific functionality.
+
+Author(s): David Marchant
+"""
+
+from datetime import datetime
+from os.path import basename, dirname, relpath, splitext
+from typing import Any, Dict
+
+from core.correctness.vars import EVENT_PATH, EVENT_RULE, EVENT_TYPE, \
+    EVENT_TYPE_WATCHDOG, JOB_CREATE_TIME, JOB_EVENT, JOB_ID, JOB_PATTERN, \
+    JOB_RECIPE, JOB_REQUIREMENTS, JOB_RULE, JOB_STATUS, JOB_TYPE, \
+    STATUS_QUEUED, WATCHDOG_BASE, WATCHDOG_HASH
+from functionality.naming import generate_id
+
+
+# mig trigger keyword replacements
+KEYWORD_PATH = "{PATH}"
+KEYWORD_REL_PATH = "{REL_PATH}"
+KEYWORD_DIR = "{DIR}"
+KEYWORD_REL_DIR = "{REL_DIR}"
+KEYWORD_FILENAME = "{FILENAME}"
+KEYWORD_PREFIX = "{PREFIX}"
+KEYWORD_BASE = "{VGRID}"
+KEYWORD_EXTENSION = "{EXTENSION}"
+KEYWORD_JOB = "{JOB}"
+
+
+def replace_keywords(old_dict:Dict[str,str], job_id:str, src_path:str, 
+            monitor_base:str)->Dict[str,str]:
+    """Function to replace all MEOW magic words in a dictionary with dynamic 
+    values."""
+    new_dict = {}
+
+    filename = basename(src_path)
+    dir = dirname(src_path)
+    relativepath = relpath(src_path, monitor_base)
+    reldirname = dirname(relativepath)
+    (prefix, extension) = splitext(filename)
+
+    for var, val in old_dict.items():
+        if isinstance(val, str):
+            val = val.replace(KEYWORD_PATH, src_path)
+            val = val.replace(KEYWORD_REL_PATH, relativepath)
+            val = val.replace(KEYWORD_DIR, dir)
+            val = val.replace(KEYWORD_REL_DIR, reldirname)
+            val = val.replace(KEYWORD_FILENAME, filename)
+            val = val.replace(KEYWORD_PREFIX, prefix)
+            val = val.replace(KEYWORD_BASE, monitor_base)
+            val = val.replace(KEYWORD_EXTENSION, extension)
+            val = val.replace(KEYWORD_JOB, job_id)
+
+            new_dict[var] = val
+        else:
+            new_dict[var] = val
+
+    return new_dict
+
+def create_event(event_type:str, path:str, rule:Any, extras:Dict[Any,Any]={}
+        )->Dict[Any,Any]:
+    """Function to create a MEOW dictionary."""
+    return {
+        **extras, 
+        EVENT_PATH: path, 
+        EVENT_TYPE: event_type, 
+        EVENT_RULE: rule
+    }
+
+def create_watchdog_event(path:str, rule:Any, base:str, hash:str, 
+            extras:Dict[Any,Any]={})->Dict[Any,Any]:
+    """Function to create a MEOW event dictionary."""
+    return create_event(
+        EVENT_TYPE_WATCHDOG, 
+        path, 
+        rule,
+        extras={
+            **extras,
+            **{
+                WATCHDOG_HASH: hash,
+                WATCHDOG_BASE: base
+            }
+        }
+    )
+
+def create_job(job_type:str, event:Dict[str,Any], extras:Dict[Any,Any]={}
+        )->Dict[Any,Any]:
+    """Function to create a MEOW job dictionary."""
+    job_dict = {
+        #TODO compress event?
+        JOB_ID: generate_id(prefix="job_"),
+        JOB_EVENT: event,
+        JOB_TYPE: job_type,
+        JOB_PATTERN: event[EVENT_RULE].pattern.name,
+        JOB_RECIPE: event[EVENT_RULE].recipe.name,
+        JOB_RULE: event[EVENT_RULE].name,
+        JOB_STATUS: STATUS_QUEUED,
+        JOB_CREATE_TIME: datetime.now(),
+        JOB_REQUIREMENTS: event[EVENT_RULE].recipe.requirements
+    }
+
+    return {**extras, **job_dict}
+
--- a/functionality/naming.py
+++ b/functionality/naming.py
@@ -0,0 +1,23 @@
+"""
+This file contains functions for dynamic naming of objects.
+
+Author(s): David Marchant
+"""
+
+from typing import List
+from random import SystemRandom
+
+from core.correctness.vars import CHAR_LOWERCASE, CHAR_UPPERCASE
+
+
+#TODO Make this guaranteed unique
+def generate_id(prefix:str="", length:int=16, existing_ids:List[str]=[], 
+        charset:str=CHAR_UPPERCASE+CHAR_LOWERCASE, attempts:int=24):
+    random_length = max(length - len(prefix), 0)
+    for _ in range(attempts):
+        id = prefix + ''.join(SystemRandom().choice(charset) 
+            for _ in range(random_length))
+        if id not in existing_ids:
+            return id
+    raise ValueError(f"Could not generate ID unique from '{existing_ids}' "
+        f"using values '{charset}' and length of '{length}'.")
--- a/functionality/parameterisation.py
+++ b/functionality/parameterisation.py
@@ -0,0 +1,121 @@
+"""
+This file contains functions for parameterising code in various formats.
+
+Author(s): David Marchant
+"""
+
+from copy import deepcopy
+from nbformat import validate
+from os import getenv
+from papermill.translators import papermill_translators
+from typing import Any, Dict, List
+
+from core.correctness.validation import check_script, check_type
+
+# Adapted from: https://github.com/rasmunk/notebook_parameterizer
+def parameterize_jupyter_notebook(jupyter_notebook:Dict[str,Any], 
+        parameters:Dict[str,Any], expand_env_values:bool=False)->Dict[str,Any]:
+    validate(jupyter_notebook)
+    check_type(parameters, Dict, 
+        hint="parameterize_jupyter_notebook.parameters")
+
+    if jupyter_notebook["nbformat"] != 4:
+        raise Warning(
+            "Parameterization designed to work with nbformat version 4. "
+            f"Differing version of '{jupyter_notebook['nbformat']}' may "
+            "produce unexpeted results.")
+
+    # Load input notebook
+    if "kernelspec" in jupyter_notebook["metadata"]:
+        kernel_name = jupyter_notebook["metadata"]["kernelspec"]["name"]
+        language = jupyter_notebook["metadata"]["kernelspec"]["language"]
+    if "language_info" in jupyter_notebook["metadata"]:
+        kernel_name = jupyter_notebook["metadata"]["language_info"]["name"]
+        language = jupyter_notebook["metadata"]["language_info"]["name"]
+    else:
+        raise AttributeError(
+            f"Notebook lacks key language and/or kernel_name attributes "
+            "within metadata")
+
+    translator = papermill_translators.find_translator(kernel_name, language)
+
+    output_notebook = deepcopy(jupyter_notebook)
+
+    # Find each
+    cells = output_notebook["cells"]
+    code_cells = [
+        (idx, cell) for idx, cell in enumerate(cells) \
+            if cell["cell_type"] == "code"
+    ]
+    for idx, cell in code_cells:
+        cell_updated = False
+        source = cell["source"]
+        # Either single string or a list of strings
+        if isinstance(source, str):
+            lines = source.split("\n")
+        else:
+            lines = source
+
+        for idy, line in enumerate(lines):
+            if "=" in line:
+                d_line = list(map(lambda x: x.replace(" ", ""), 
+                    line.split("=")))
+                # Matching parameter name
+                if len(d_line) == 2 and d_line[0] in parameters:
+                    value = parameters[d_line[0]]
+                    # Whether to expand value from os env
+                    if (
+                        expand_env_values
+                        and isinstance(value, str)
+                        and value.startswith("ENV_")
+                    ):
+                        env_var = value.replace("ENV_", "")
+                        value = getenv(
+                            env_var, 
+                            "MISSING ENVIRONMENT VARIABLE: {}".format(env_var)
+                        )
+                    lines[idy] = translator.assign(
+                        d_line[0], translator.translate(value)
+                    )
+
+                    cell_updated = True
+        if cell_updated:
+            cells[idx]["source"] = "\n".join(lines)
+
+    # Validate that the parameterized notebook is still valid
+    validate(output_notebook, version=4)
+
+    return output_notebook
+
+def parameterize_python_script(script:List[str], parameters:Dict[str,Any], 
+        expand_env_values:bool=False)->Dict[str,Any]:
+    check_script(script)
+    check_type(parameters, Dict
+        ,hint="parameterize_python_script.parameters")
+
+    output_script = deepcopy(script)
+
+    for i, line in enumerate(output_script):
+        if "=" in line:
+            d_line = list(map(lambda x: x.replace(" ", ""), 
+                line.split("=")))
+            # Matching parameter name
+            if len(d_line) == 2 and d_line[0] in parameters:
+                value = parameters[d_line[0]]
+                # Whether to expand value from os env
+                if (
+                    expand_env_values
+                    and isinstance(value, str)
+                    and value.startswith("ENV_")
+                ):
+                    env_var = value.replace("ENV_", "")
+                    value = getenv(
+                        env_var, 
+                        "MISSING ENVIRONMENT VARIABLE: {}".format(env_var)
+                    )
+                output_script[i] = f"{d_line[0]} = {repr(value)}"
+                
+    # Validate that the parameterized notebook is still valid
+    check_script(output_script)
+
+    return output_script
--- a/functionality/process_io.py
+++ b/functionality/process_io.py
@@ -0,0 +1,41 @@
+"""
+This file contains functions for reading and writing between processes.
+
+Author(s): David Marchant
+"""
+
+from os import name as osName
+from typing import List
+
+from multiprocessing.connection import Connection, wait as multi_wait
+# Need to import additional Connection type for Windows machines
+if osName == 'nt':
+    from multiprocessing.connection import PipeConnection
+from multiprocessing.queues import Queue
+from core.correctness.vars import VALID_CHANNELS
+
+
+def wait(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
+    if osName == 'nt':
+        return wait_windows(inputs)
+    return wait_linux(inputs)
+
+def wait_windows(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
+    all_connections = [i for i in inputs if type(i) is Connection] \
+        + [i for i in inputs if type(i) is PipeConnection] \
+        + [i._reader for i in inputs if type(i) is Queue]
+    ready = multi_wait(all_connections)
+    ready_inputs = [i for i in inputs if \
+        (type(i) is Connection and i in ready) \
+        or (type(i) is PipeConnection and i in ready) \
+        or (type(i) is Queue and i._reader in ready)]
+    return ready_inputs
+
+def wait_linux(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
+    all_connections = [i for i in inputs if type(i) is Connection] \
+        + [i._reader for i in inputs if type(i) is Queue]
+    ready = multi_wait(all_connections)
+    ready_inputs = [i for i in inputs if \
+        (type(i) is Connection and i in ready) \
+        or (type(i) is Queue and i._reader in ready)]
+    return ready_inputs