added parameter sweeps back in

This commit is contained in:
PatchOfScotland
2023-01-10 16:44:33 +01:00
parent 5dded8fc96
commit e9519d718f
9 changed files with 828 additions and 45 deletions

View File

@ -1,7 +1,7 @@
from inspect import signature
from os.path import sep
from typing import Any, _SpecialForm, Union, get_origin, get_args
from os.path import sep, exists, isfile, isdir, dirname
from typing import Any, _SpecialForm, Union, Tuple, get_origin, get_args
from core.correctness.vars import VALID_PATH_CHARS, get_not_imp_msg
@ -129,10 +129,52 @@ def valid_list(variable:list[Any], entry_type:type,
for entry in variable:
check_type(entry, entry_type, alt_types=alt_types)
def valid_path(variable:str, allow_base=False, extension:str="", min_length=1):
def valid_path(variable:str, allow_base:bool=False, extension:str="",
min_length:int=1):
valid_string(variable, VALID_PATH_CHARS, min_length=min_length)
if not allow_base and variable.startswith(sep):
raise ValueError(f"Cannot accept path '{variable}'. Must be relative.")
if extension and not variable.endswith(extension):
raise ValueError(f"Path '{variable}' does not have required "
f"extension '{extension}'.")
def valid_existing_file_path(variable:str, allow_base:bool=False,
extension:str=""):
valid_path(variable, allow_base=allow_base, extension=extension)
if not exists(variable):
raise FileNotFoundError(
f"Requested file path '{variable}' does not exist.")
if not isfile(variable):
raise ValueError(
f"Requested file '{variable}' is not a file.")
def valid_existing_dir_path(variable:str, allow_base:bool=False):
valid_path(variable, allow_base=allow_base, extension="")
if not exists(variable):
raise FileNotFoundError(
f"Requested dir path '{variable}' does not exist.")
if not isdir(variable):
raise ValueError(
f"Requested dir '{variable}' is not a directory.")
def valid_non_existing_path(variable:str, allow_base:bool=False):
valid_path(variable, allow_base=allow_base, extension="")
if exists(variable):
raise ValueError(f"Requested path '{variable}' already exists.")
if dirname(variable) and not exists(dirname(variable)):
raise ValueError(
f"Route to requested path '{variable}' does not exist.")
def setup_debugging(print:Any=None, logging:int=0)->Tuple[Any,int]:
check_type(logging, int)
if print is None:
return None, 0
else:
if not isinstance(print, object):
raise TypeError(f"Invalid print location provided")
writeable = getattr(print, "write", None)
if not writeable or not callable(writeable):
raise TypeError(f"Print object does not implement required "
"'write' function")
return print, logging

View File

@ -7,6 +7,7 @@ from inspect import signature
from typing import Union
# validation
CHAR_LOWERCASE = 'abcdefghijklmnopqrstuvwxyz'
CHAR_UPPERCASE = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
CHAR_NUMERIC = '0123456789'
@ -26,13 +27,167 @@ VALID_TRIGGERING_PATH_CHARS = VALID_NAME_CHARS + ".*" + os.path.sep
VALID_CHANNELS = Union[Connection,Queue]
# hashing
HASH_BUFFER_SIZE = 65536
SHA256 = "sha256"
# testing
BAREBONES_NOTEBOOK = {
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 4
}
TEST_MONITOR_BASE = "test_monitor_base"
TEST_HANDLER_BASE = "test_handler_base"
TEST_JOB_OUTPUT = "test_job_output"
COMPLETE_NOTEBOOK = {
"cells": [
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": "# The first cell\n\ns = 0\nnum = 1000"
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": "for i in range(num):\n s += i"
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": "div_by = 4"
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": "result = s / div_by"
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": "print(result)"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
APPENDING_NOTEBOOK = {
"cells": [
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [
"# Default parameters values\n",
"# The line to append\n",
"extra = 'This line comes from a default pattern'\n",
"# Data input file location\n",
"infile = 'start/alpha.txt'\n",
"# Output file location\n",
"outfile = 'first/alpha.txt'"
]
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [
"# load in dataset. This should be a text file\n",
"with open(infile) as input_file:\n",
" data = input_file.read()"
]
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [
"# Append the line\n",
"appended = data + '\\n' + extra"
]
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"# Create output directory if it doesn't exist\n",
"output_dir_path = os.path.dirname(outfile)\n",
"\n",
"if output_dir_path:\n",
" os.makedirs(output_dir_path, exist_ok=True)\n",
"\n",
"# Save added array as new dataset\n",
"with open(outfile, 'w') as output_file:\n",
" output_file.write(appended)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6 (main, Nov 14 2022, 16:10:14) [GCC 11.3.0]"
},
"vscode": {
"interpreter": {
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}
# events
FILE_CREATE_EVENT = "file_created"
FILE_MODIFY_EVENT = "file_modified"
FILE_MOVED_EVENT = "file_moved"
@ -57,9 +212,12 @@ DIR_EVENTS = [
DIR_DELETED_EVENT
]
PIPE_READ = 0
PIPE_WRITE = 1
# debug printing levels
DEBUG_ERROR = 1
DEBUG_WARNING = 2
DEBUG_INFO = 3
# debug message functions
def get_drt_imp_msg(base_class):
return f"{base_class.__name__} may not be instantiated directly. " \
f"Implement a child class."

View File

@ -1,16 +1,25 @@
import sys
import copy
import hashlib
import inspect
import json
import nbformat
import os
import sys
import yaml
from multiprocessing.connection import Connection, wait as multi_wait
from multiprocessing.queues import Queue
from typing import Union
from papermill.translators import papermill_translators
from typing import Any, Union
from random import SystemRandom
from core.meow import BasePattern, BaseRecipe, BaseRule
from core.correctness.validation import check_type, valid_dict, valid_list
from core.correctness.validation import check_type, valid_dict, valid_list, \
valid_existing_file_path, valid_path
from core.correctness.vars import CHAR_LOWERCASE, CHAR_UPPERCASE, \
VALID_CHANNELS
VALID_CHANNELS, HASH_BUFFER_SIZE, SHA256, DEBUG_WARNING, DEBUG_ERROR, \
DEBUG_INFO
def check_pattern_dict(patterns, min_length=1):
valid_dict(patterns, str, BasePattern, strict=False, min_length=min_length)
@ -87,3 +96,203 @@ def wait(inputs:list[VALID_CHANNELS])->list[VALID_CHANNELS]:
(type(i) is Connection and i in ready) \
or (type(i) is Queue and i._reader in ready)]
return ready_inputs
def _get_file_sha256(file_path):
sha256_hash = hashlib.sha256()
with open(file_path, 'rb') as file_to_hash:
while True:
buffer = file_to_hash.read(HASH_BUFFER_SIZE)
if not buffer:
break
sha256_hash.update(buffer)
return sha256_hash.hexdigest()
def get_file_hash(file_path:str, hash:str):
check_type(hash, str)
import os
valid_existing_file_path(file_path)
valid_hashes = {
SHA256: _get_file_sha256
}
if hash not in valid_hashes:
raise KeyError(f"Cannot use hash '{hash}'. Valid are "
"'{list(valid_hashes.keys())}")
return valid_hashes[hash](file_path)
def rmtree(directory:str):
"""
Remove a directory and all its contents.
Should be faster than shutil.rmtree
:param: (str) The firectory to empty and remove
:return: No return
"""
for root, dirs, files in os.walk(directory, topdown=False):
for file in files:
os.remove(os.path.join(root, file))
for dir in dirs:
rmtree(os.path.join(root, dir))
os.rmdir(directory)
def make_dir(path:str, can_exist:bool=True, ensure_clean:bool=False):
"""
Creates a new directory at the given path.
:param path: (str) The directory path.
:param can_exist: (boolean) [optional] A toggle for if a previously
existing directory at the path will throw an error or not. Default is
true (e.g. no error is thrown if the path already exists)
:param ensure_clean: (boolean) [optional] A toggle for if a previously
existing directory at the path will be replaced with a new emtpy directory.
Default is False.
:return: No return
"""
if not os.path.exists(path):
os.mkdir(path)
elif os.path.isfile(path):
raise ValueError('Cannot make directory in %s as it already '
'exists and is a file' % path)
else:
if not can_exist:
if ensure_clean:
rmtree(path)
os.mkdir(path)
else:
raise ValueError("Directory %s already exists. " % path)
def read_yaml(filepath:str):
"""
Reads a file path as a yaml object.
:param filepath: (str) The file to read.
:return: (object) An object read from the file.
"""
with open(filepath, 'r') as yaml_file:
return yaml.load(yaml_file, Loader=yaml.Loader)
def write_yaml(source:Any, filename:str, mode:str='w'):
"""
Writes a given objcet to a yaml file.
:param source: (any) A python object to be written.
:param filename: (str) The filename to be written to.
:return: No return
"""
with open(filename, mode) as param_file:
yaml.dump(source, param_file, default_flow_style=False)
def read_notebook(filepath:str):
valid_path(filepath, extension="ipynb")
with open(filepath, 'r') as read_file:
return json.load(read_file)
def write_notebook(source:dict[str,Any], filename:str):
"""
Writes the given notebook source code to a given filename.
:param source: (dict) The notebook source dictionary.
:param filename: (str) The filename to write to.
:return: No return
"""
with open(filename, 'w') as job_file:
json.dump(source, job_file)
# Adapted from: https://github.com/rasmunk/notebook_parameterizer
def parameterize_jupyter_notebook( jupyter_notebook:dict[str,Any],
parameters:dict[str,Any], expand_env_values:bool=False)->dict[str,Any]:
nbformat.validate(jupyter_notebook)
check_type(parameters, dict)
if jupyter_notebook["nbformat"] != 4:
raise Warning(
"Parameterization designed to work with nbformat version 4. "
f"Differing version of '{jupyter_notebook['nbformat']}' may "
"produce unexpeted results.")
# Load input notebook
if "kernelspec" in jupyter_notebook["metadata"]:
kernel_name = jupyter_notebook["metadata"]["kernelspec"]["name"]
language = jupyter_notebook["metadata"]["kernelspec"]["language"]
if "language_info" in jupyter_notebook["metadata"]:
kernel_name = jupyter_notebook["metadata"]["language_info"]["name"]
language = jupyter_notebook["metadata"]["language_info"]["name"]
else:
raise AttributeError(
f"Notebook lacks key language and/or kernel_name attributes "
"within metadata")
translator = papermill_translators.find_translator(kernel_name, language)
output_notebook = copy.deepcopy(jupyter_notebook)
# Find each
cells = output_notebook["cells"]
code_cells = [
(idx, cell) for idx, cell in enumerate(cells) \
if cell["cell_type"] == "code"
]
for idx, cell in code_cells:
cell_updated = False
source = cell["source"]
# Either single string or a list of strings
if isinstance(source, str):
lines = source.split("\n")
else:
lines = source
for idy, line in enumerate(lines):
if "=" in line:
d_line = list(map(lambda x: x.replace(" ", ""),
line.split("=")))
# Matching parameter name
if len(d_line) == 2 and d_line[0] in parameters:
value = parameters[d_line[0]]
# Whether to expand value from os env
if (
expand_env_values
and isinstance(value, str)
and value.startswith("ENV_")
):
env_var = value.replace("ENV_", "")
value = os.getenv(
env_var,
"MISSING ENVIRONMENT VARIABLE: {}".format(env_var)
)
lines[idy] = translator.assign(
d_line[0], translator.translate(value)
)
cell_updated = True
if cell_updated:
cells[idx]["source"] = "\n".join(lines)
# Validate that the parameterized notebook is still valid
nbformat.validate(output_notebook, version=4)
return output_notebook
def print_debug(print_target, debug_level, msg, level)->None:
if print_target is None:
return
else:
if level <= debug_level:
status = "ERROR"
if level == DEBUG_INFO:
status = "INFO"
elif level == DEBUG_WARNING:
status = "WARNING"
print(f"{status}: {msg}", file=print_target)

View File

@ -187,7 +187,6 @@ class BaseHandler:
pass
# TODO test me
class MeowRunner:
monitor:BaseMonitor
handler:BaseHandler
@ -199,9 +198,13 @@ class MeowRunner:
def start(self)->None:
self.monitor.start()
if hasattr(self.handler, "start"):
self.handler.start()
def stop(self)->None:
self.monitor.stop()
if hasattr(self.handler, "stop"):
self.handler.stop()
def _is_valid_monitor(self, monitor:BaseMonitor)->None:
check_type(monitor, BaseMonitor)