split functionality file across new module as was getting too long on its own. All functions remian the same, but imports will need to be updated.

2023-02-10 14:46:46 +01:00
parent dfd83e28dc
commit 7f8b4f3e32
23 changed files with 540 additions and 464 deletions
--- a/core/correctness/init.py
+++ b/core/correctness/init.py
@@ -1,3 +0,0 @@
-
-from core.correctness.validation import *
-from core.correctness.vars import *
--- a/core/correctness/validation.py
+++ b/core/correctness/validation.py
@@ -8,7 +8,7 @@ Author(s): David Marchant
 from datetime import datetime
 from inspect import signature
 from os.path import sep, exists, isfile, isdir, dirname
-from typing import Any, _SpecialForm, Union, Tuple, Type, Dict, List, \
+from typing import Any, _SpecialForm, Union, Type, Dict, List, \
    get_origin, get_args

 from core.correctness.vars import VALID_PATH_CHARS, get_not_imp_msg, \
@@ -248,22 +248,6 @@ def valid_non_existing_path(variable:str, allow_base:bool=False):
        raise ValueError(
            f"Route to requested path '{variable}' does not exist.")

-def setup_debugging(print:Any=None, logging:int=0)->Tuple[Any,int]:
-    """Create a place for debug messages to be sent. Always returns a place, 
-    along with a logging level."""
-    check_type(logging, int)
-    if print is None:
-        return None, 0
-    else:
-        if not isinstance(print, object):
-            raise TypeError(f"Invalid print location provided")
-        writeable = getattr(print, "write", None)
-        if not writeable or not callable(writeable):
-            raise TypeError(f"Print object does not implement required "
-                "'write' function")
-
-    return print, logging
-
 def valid_meow_dict(meow_dict:Dict[str,Any], msg:str, 
        keys:Dict[str,Type])->None:
    """Check given dictionary expresses a meow construct. This won't do much 
--- a/core/functionality.py
+++ b/core/functionality.py
@@ -1,402 +0,0 @@
-# TODO comments
-import copy
-import hashlib
-import json
-import nbformat
-import os
-import yaml
-
-from datetime import datetime
-from typing import List
-
-from multiprocessing.connection import Connection, wait as multi_wait
-# Need to import additional Connection type for Windows machines
-if os.name == 'nt':
-    from multiprocessing.connection import PipeConnection
-from multiprocessing.queues import Queue
-from papermill.translators import papermill_translators
-from typing import Any, Dict
-from random import SystemRandom
-
-from core.correctness.validation import check_type, valid_existing_file_path, \
-    valid_path, check_script
-from core.correctness.vars import CHAR_LOWERCASE, CHAR_UPPERCASE, \
-    VALID_CHANNELS, HASH_BUFFER_SIZE, SHA256, DEBUG_WARNING, DEBUG_INFO, \
-    EVENT_TYPE, EVENT_PATH, JOB_EVENT, JOB_TYPE, JOB_ID, JOB_PATTERN, \
-    JOB_RECIPE, JOB_RULE, EVENT_RULE, JOB_STATUS, STATUS_QUEUED, \
-    JOB_CREATE_TIME, JOB_REQUIREMENTS, WATCHDOG_BASE, WATCHDOG_HASH, \
-    EVENT_TYPE_WATCHDOG
-
-# mig trigger keyword replacements
-KEYWORD_PATH = "{PATH}"
-KEYWORD_REL_PATH = "{REL_PATH}"
-KEYWORD_DIR = "{DIR}"
-KEYWORD_REL_DIR = "{REL_DIR}"
-KEYWORD_FILENAME = "{FILENAME}"
-KEYWORD_PREFIX = "{PREFIX}"
-KEYWORD_BASE = "{VGRID}"
-KEYWORD_EXTENSION = "{EXTENSION}"
-KEYWORD_JOB = "{JOB}"
-
-
-#TODO Make this guaranteed unique
-def generate_id(prefix:str="", length:int=16, existing_ids:List[str]=[], 
-        charset:str=CHAR_UPPERCASE+CHAR_LOWERCASE, attempts:int=24):
-    random_length = max(length - len(prefix), 0)
-    for _ in range(attempts):
-        id = prefix + ''.join(SystemRandom().choice(charset) 
-            for _ in range(random_length))
-        if id not in existing_ids:
-            return id
-    raise ValueError(f"Could not generate ID unique from '{existing_ids}' "
-        f"using values '{charset}' and length of '{length}'.")
-
-def wait(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
-    if os.name == 'nt':
-        return wait_windows(inputs)
-    return wait_linux(inputs)
-
-def wait_windows(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
-    all_connections = [i for i in inputs if type(i) is Connection] \
-        + [i for i in inputs if type(i) is PipeConnection] \
-        + [i._reader for i in inputs if type(i) is Queue]
-    ready = multi_wait(all_connections)
-    ready_inputs = [i for i in inputs if \
-        (type(i) is Connection and i in ready) \
-        or (type(i) is PipeConnection and i in ready) \
-        or (type(i) is Queue and i._reader in ready)]
-    return ready_inputs
-
-def wait_linux(inputs:List[VALID_CHANNELS])->List[VALID_CHANNELS]:
-    all_connections = [i for i in inputs if type(i) is Connection] \
-        + [i._reader for i in inputs if type(i) is Queue]
-    ready = multi_wait(all_connections)
-    ready_inputs = [i for i in inputs if \
-        (type(i) is Connection and i in ready) \
-        or (type(i) is Queue and i._reader in ready)]
-    return ready_inputs
-
-def _get_file_sha256(file_path):
-    sha256_hash = hashlib.sha256()
-    
-    with open(file_path, 'rb') as file_to_hash:
-        while True:
-            buffer = file_to_hash.read(HASH_BUFFER_SIZE)
-            if not buffer:
-                break
-            sha256_hash.update(buffer)
-    
-    return sha256_hash.hexdigest()
-
-def get_file_hash(file_path:str, hash:str, hint:str=""):
-    check_type(hash, str, hint=hint)
-
-    valid_existing_file_path(file_path)
-
-    valid_hashes = {
-        SHA256: _get_file_sha256
-    }
-    if hash not in valid_hashes:
-        raise KeyError(f"Cannot use hash '{hash}'. Valid are "
-            f"'{list(valid_hashes.keys())}")
-
-    return valid_hashes[hash](file_path)
-
-def rmtree(directory:str):
-    """
-    Remove a directory and all its contents. 
-    Should be faster than shutil.rmtree
-    
-    :param: (str) The firectory to empty and remove
-
-    :return: No return
-    """
-    if not os.path.exists(directory):
-        return
-    for root, dirs, files in os.walk(directory, topdown=False):
-        for file in files:
-            os.remove(os.path.join(root, file))
-        for dir in dirs:
-            rmtree(os.path.join(root, dir))
-    os.rmdir(directory)
-
-def make_dir(path:str, can_exist:bool=True, ensure_clean:bool=False):
-    """
-    Creates a new directory at the given path.
-
-    :param path: (str) The directory path.
-
-    :param can_exist: (boolean) [optional] A toggle for if a previously
-    existing directory at the path will throw an error or not. Default is
-    true (e.g. no error is thrown if the path already exists)
-
-    :param ensure_clean: (boolean) [optional] A toggle for if a previously
-    existing directory at the path will be replaced with a new emtpy directory.
-    Default is False.
-
-    :return: No return
-    """
-    if os.path.exists(path):
-        if os.path.isfile(path):
-            raise ValueError(
-                f"Cannot make directory in {path} as it already exists and is "
-                "a file")
-        if ensure_clean:
-            rmtree(path)
-                
-    os.makedirs(path, exist_ok=can_exist)
-    
-def read_file(filepath:str):
-    with open(filepath, 'r') as file:
-        return file.read()
-
-def read_file_lines(filepath:str):
-    with open(filepath, 'r') as file:
-        return file.readlines()
-
-def write_file(source:str, filename:str):
-    with open(filename, 'w') as file:
-        file.write(source)
-
-def read_yaml(filepath:str):
-    """
-    Reads a file path as a yaml object.
-
-    :param filepath: (str) The file to read.
-
-    :return: (object) An object read from the file.
-    """
-    with open(filepath, 'r') as yaml_file:
-        return yaml.load(yaml_file, Loader=yaml.Loader)
-
-def write_yaml(source:Any, filename:str):
-    """
-    Writes a given objcet to a yaml file.
-
-    :param source: (any) A python object to be written.
-
-    :param filename: (str) The filename to be written to.
-
-    :return: No return
-    """
-    with open(filename, 'w') as param_file:
-        yaml.dump(source, param_file, default_flow_style=False)
-
-def read_notebook(filepath:str):
-    valid_path(filepath, extension="ipynb")
-    with open(filepath, 'r') as read_file:
-        return json.load(read_file)
-
-def write_notebook(source:Dict[str,Any], filename:str):
-    """
-    Writes the given notebook source code to a given filename.
-
-    :param source: (dict) The notebook source dictionary.
-
-    :param filename: (str) The filename to write to.
-
-    :return: No return
-    """
-    with open(filename, 'w') as job_file:
-        json.dump(source, job_file)
-
-# Adapted from: https://github.com/rasmunk/notebook_parameterizer
-def parameterize_jupyter_notebook(jupyter_notebook:Dict[str,Any], 
-        parameters:Dict[str,Any], expand_env_values:bool=False)->Dict[str,Any]:
-    nbformat.validate(jupyter_notebook)
-    check_type(parameters, Dict, 
-        hint="parameterize_jupyter_notebook.parameters")
-
-    if jupyter_notebook["nbformat"] != 4:
-        raise Warning(
-            "Parameterization designed to work with nbformat version 4. "
-            f"Differing version of '{jupyter_notebook['nbformat']}' may "
-            "produce unexpeted results.")
-
-    # Load input notebook
-    if "kernelspec" in jupyter_notebook["metadata"]:
-        kernel_name = jupyter_notebook["metadata"]["kernelspec"]["name"]
-        language = jupyter_notebook["metadata"]["kernelspec"]["language"]
-    if "language_info" in jupyter_notebook["metadata"]:
-        kernel_name = jupyter_notebook["metadata"]["language_info"]["name"]
-        language = jupyter_notebook["metadata"]["language_info"]["name"]
-    else:
-        raise AttributeError(
-            f"Notebook lacks key language and/or kernel_name attributes "
-            "within metadata")
-
-    translator = papermill_translators.find_translator(kernel_name, language)
-
-    output_notebook = copy.deepcopy(jupyter_notebook)
-
-    # Find each
-    cells = output_notebook["cells"]
-    code_cells = [
-        (idx, cell) for idx, cell in enumerate(cells) \
-            if cell["cell_type"] == "code"
-    ]
-    for idx, cell in code_cells:
-        cell_updated = False
-        source = cell["source"]
-        # Either single string or a list of strings
-        if isinstance(source, str):
-            lines = source.split("\n")
-        else:
-            lines = source
-
-        for idy, line in enumerate(lines):
-            if "=" in line:
-                d_line = list(map(lambda x: x.replace(" ", ""), 
-                    line.split("=")))
-                # Matching parameter name
-                if len(d_line) == 2 and d_line[0] in parameters:
-                    value = parameters[d_line[0]]
-                    # Whether to expand value from os env
-                    if (
-                        expand_env_values
-                        and isinstance(value, str)
-                        and value.startswith("ENV_")
-                    ):
-                        env_var = value.replace("ENV_", "")
-                        value = os.getenv(
-                            env_var, 
-                            "MISSING ENVIRONMENT VARIABLE: {}".format(env_var)
-                        )
-                    lines[idy] = translator.assign(
-                        d_line[0], translator.translate(value)
-                    )
-
-                    cell_updated = True
-        if cell_updated:
-            cells[idx]["source"] = "\n".join(lines)
-
-    # Validate that the parameterized notebook is still valid
-    nbformat.validate(output_notebook, version=4)
-
-    return output_notebook
-
-def parameterize_python_script(script:List[str], parameters:Dict[str,Any], 
-        expand_env_values:bool=False)->Dict[str,Any]:
-    check_script(script)
-    check_type(parameters, Dict
-        ,hint="parameterize_python_script.parameters")
-
-    output_script = copy.deepcopy(script)
-
-    for i, line in enumerate(output_script):
-        if "=" in line:
-            d_line = list(map(lambda x: x.replace(" ", ""), 
-                line.split("=")))
-            # Matching parameter name
-            if len(d_line) == 2 and d_line[0] in parameters:
-                value = parameters[d_line[0]]
-                # Whether to expand value from os env
-                if (
-                    expand_env_values
-                    and isinstance(value, str)
-                    and value.startswith("ENV_")
-                ):
-                    env_var = value.replace("ENV_", "")
-                    value = os.getenv(
-                        env_var, 
-                        "MISSING ENVIRONMENT VARIABLE: {}".format(env_var)
-                    )
-                output_script[i] = f"{d_line[0]} = {repr(value)}"
-                
-    # Validate that the parameterized notebook is still valid
-    check_script(output_script)
-
-    return output_script
-
-def print_debug(print_target, debug_level, msg, level)->None:
-    """Function to print a message to the debug target, if its level exceeds 
-    the given one."""
-    if print_target is None:
-        return
-    else:
-        if level <= debug_level:
-            status = "ERROR"
-            if level == DEBUG_INFO:
-                status = "INFO"
-            elif level == DEBUG_WARNING:
-                status = "WARNING"
-            print(f"{status}: {msg}", file=print_target)
-
-def replace_keywords(old_dict:Dict[str,str], job_id:str, src_path:str, 
-            monitor_base:str)->Dict[str,str]:
-    """Function to replace all MEOW magic words in a dictionary with dynamic 
-    values."""
-    new_dict = {}
-
-    filename = os.path.basename(src_path)
-    dirname = os.path.dirname(src_path)
-    relpath = os.path.relpath(src_path, monitor_base)
-    reldirname = os.path.dirname(relpath)
-    (prefix, extension) = os.path.splitext(filename)
-
-    for var, val in old_dict.items():
-        if isinstance(val, str):
-            val = val.replace(KEYWORD_PATH, src_path)
-            val = val.replace(KEYWORD_REL_PATH, relpath)
-            val = val.replace(KEYWORD_DIR, dirname)
-            val = val.replace(KEYWORD_REL_DIR, reldirname)
-            val = val.replace(KEYWORD_FILENAME, filename)
-            val = val.replace(KEYWORD_PREFIX, prefix)
-            val = val.replace(KEYWORD_BASE, monitor_base)
-            val = val.replace(KEYWORD_EXTENSION, extension)
-            val = val.replace(KEYWORD_JOB, job_id)
-
-            new_dict[var] = val
-        else:
-            new_dict[var] = val
-
-    return new_dict
-
-def create_event(event_type:str, path:str, rule:Any, extras:Dict[Any,Any]={}
-        )->Dict[Any,Any]:
-    """Function to create a MEOW dictionary."""
-    return {
-        **extras, 
-        EVENT_PATH: path, 
-        EVENT_TYPE: event_type, 
-        EVENT_RULE: rule
-    }
-
-def create_watchdog_event(path:str, rule:Any, base:str, hash:str, 
-            extras:Dict[Any,Any]={})->Dict[Any,Any]:
-    """Function to create a MEOW event dictionary."""
-    return create_event(
-        EVENT_TYPE_WATCHDOG, 
-        path, 
-        rule,
-        extras={
-            **extras,
-            **{
-                WATCHDOG_HASH: hash,
-                WATCHDOG_BASE: base
-            }
-        }
-    )
-
-def create_job(job_type:str, event:Dict[str,Any], extras:Dict[Any,Any]={}
-        )->Dict[Any,Any]:
-    """Function to create a MEOW job dictionary."""
-    job_dict = {
-        #TODO compress event?
-        JOB_ID: generate_id(prefix="job_"),
-        JOB_EVENT: event,
-        JOB_TYPE: job_type,
-        JOB_PATTERN: event[EVENT_RULE].pattern.name,
-        JOB_RECIPE: event[EVENT_RULE].recipe.name,
-        JOB_RULE: event[EVENT_RULE].name,
-        JOB_STATUS: STATUS_QUEUED,
-        JOB_CREATE_TIME: datetime.now(),
-        JOB_REQUIREMENTS: event[EVENT_RULE].recipe.requirements
-    }
-
-    return {**extras, **job_dict}
-
-def lines_to_string(lines:List[str])->str:
-    """Function to convert a list of str lines, into one continuous string 
-    separated by newline characters"""
-    return "\n".join(lines)
--- a/core/meow.py
+++ b/core/meow.py
@@ -19,7 +19,7 @@ from core.correctness.vars import VALID_RECIPE_NAME_CHARS, \
    SWEEP_JUMP, SWEEP_START, SWEEP_STOP, get_drt_imp_msg
 from core.correctness.validation import valid_string, check_type, \
    check_implementation, valid_list, valid_dict
-from core.functionality import generate_id
+from functionality.naming import generate_id


 class BaseRecipe:
--- a/core/runner.py
+++ b/core/runner.py
@@ -15,12 +15,12 @@ from random import randrange
 from typing import Any, Union, Dict, List

 from core.correctness.vars import DEBUG_WARNING, DEBUG_INFO, EVENT_TYPE, \
-    VALID_CHANNELS, JOB_ID, META_FILE, DEFAULT_JOB_OUTPUT_DIR, \
-    DEFAULT_JOB_QUEUE_DIR
-from core.correctness.validation import setup_debugging, check_type, \
-    valid_list, valid_dir_path
-from core.functionality import  print_debug, wait, read_yaml, make_dir
+    VALID_CHANNELS, META_FILE, DEFAULT_JOB_OUTPUT_DIR, DEFAULT_JOB_QUEUE_DIR
+from core.correctness.validation import check_type, valid_list, valid_dir_path
 from core.meow import BaseHandler, BaseMonitor, BaseConductor
+from functionality.debug import setup_debugging, print_debug
+from functionality.file_io import make_dir, read_yaml
+from functionality.process_io import wait


 class MeowRunner: