added support for directory event matching

This commit is contained in:
PatchOfScotland
2023-03-31 13:51:14 +02:00
parent 14dec78756
commit 5952b02be4
10 changed files with 253 additions and 50 deletions

View File

@ -237,6 +237,27 @@ def valid_existing_file_path(variable:str, allow_base:bool=False,
msg = f"Requested file '{variable}' is not a file."
raise ValueError(msg)
def valid_existing_dir_path(variable:str, allow_base:bool=False,
extension:str="", hint:str=""):
"""Check the given string is a path to an existing dir."""
# Check that the string is a path
valid_path(variable, allow_base=allow_base, extension=extension, hint=hint)
# Check the path exists
if not exists(variable):
if hint:
msg = f"Requested dir path '{variable}' in '{hint}' does not " \
"exist."
else:
msg = f"Requested dir path '{variable}' does not exist."
raise FileNotFoundError(msg)
# Check it is a dir
if not isdir(variable):
if hint:
msg = f"Requested dir '{variable}' in '{hint}' is not a dir."
else:
msg = f"Requested dir '{variable}' is not a dir."
raise ValueError(msg)
def valid_dir_path(variable:str, must_exist:bool=False, allow_base:bool=False,
hint:str="")->None:
"""Check the given string is a valid directory path, either to an existing

View File

@ -5,12 +5,15 @@ Author(s): David Marchant
"""
from hashlib import sha256
from os import listdir
from os.path import isfile
from meow_base.core.correctness.vars import HASH_BUFFER_SIZE, SHA256
from meow_base.core.correctness.validation import check_type, \
valid_existing_file_path
valid_existing_file_path, valid_existing_dir_path
def _get_file_sha256(file_path):
def _get_file_sha256(file_path:str)->str:
sha256_hash = sha256()
with open(file_path, 'rb') as file_to_hash:
@ -22,7 +25,16 @@ def _get_file_sha256(file_path):
return sha256_hash.hexdigest()
def get_file_hash(file_path:str, hash:str, hint:str=""):
# TODO update this to be a bit more robust
def _get_dir_sha256(dir_path:str)->str:
sha256_hash = sha256()
buffer = str(listdir(dir_path)).encode()
sha256_hash.update(buffer)
return sha256_hash.hexdigest()
def get_file_hash(file_path:str, hash:str, hint:str="")->str:
check_type(hash, str, hint=hint)
valid_existing_file_path(file_path)
@ -35,3 +47,24 @@ def get_file_hash(file_path:str, hash:str, hint:str=""):
f"'{list(valid_hashes.keys())}")
return valid_hashes[hash](file_path)
# TODO inspect this a bit more fully
def get_dir_hash(file_path:str, hash:str, hint:str="")->str:
check_type(hash, str, hint=hint)
valid_existing_dir_path(file_path)
valid_hashes = {
SHA256: _get_dir_sha256
}
if hash not in valid_hashes:
raise KeyError(f"Cannot use hash '{hash}'. Valid are "
f"'{list(valid_hashes.keys())}")
return valid_hashes[hash](file_path)
def get_hash(path:str, hash:str, hint:str="")->str:
if isfile(path):
return get_file_hash(path, hash, hint=hint)
else:
return get_dir_hash(path, hash, hint=hint)

View File

@ -26,10 +26,11 @@ from meow_base.core.correctness.validation import check_type, valid_string, \
valid_dict, valid_list, valid_dir_path
from meow_base.core.correctness.vars import VALID_RECIPE_NAME_CHARS, \
VALID_VARIABLE_NAME_CHARS, FILE_EVENTS, FILE_CREATE_EVENT, \
FILE_MODIFY_EVENT, FILE_MOVED_EVENT, DEBUG_INFO, \
FILE_RETROACTIVE_EVENT, SHA256, VALID_PATH_CHARS, FILE_CLOSED_EVENT
FILE_MODIFY_EVENT, FILE_MOVED_EVENT, DEBUG_INFO, DIR_EVENTS, \
FILE_RETROACTIVE_EVENT, SHA256, VALID_PATH_CHARS, FILE_CLOSED_EVENT, \
DIR_RETROACTIVE_EVENT
from meow_base.functionality.debug import setup_debugging, print_debug
from meow_base.functionality.hashing import get_file_hash
from meow_base.functionality.hashing import get_hash
from meow_base.functionality.meow import create_rule, create_watchdog_event
# Events that are monitored by default
@ -140,9 +141,9 @@ class FileEventPattern(BasePattern):
hint="FileEventPattern.event_mask"
)
for mask in event_mask:
if mask not in FILE_EVENTS:
if mask not in FILE_EVENTS + DIR_EVENTS:
raise ValueError(f"Invalid event mask '{mask}'. Valid are: "
f"{FILE_EVENTS}")
f"{FILE_EVENTS + DIR_EVENTS}")
def _is_valid_sweep(self, sweep: Dict[str,Union[int,float,complex]]) -> None:
"""Validation check for 'sweep' variable from main constructor."""
@ -249,7 +250,7 @@ class WatchdogMonitor(BaseMonitor):
event.src_path,
rule,
self.base_dir,
get_file_hash(event.src_path, SHA256)
get_hash(event.src_path, SHA256)
)
print_debug(self._print_target, self.debug_level,
f"Event at {src_path} hit rule {rule.name}",
@ -536,7 +537,8 @@ class WatchdogMonitor(BaseMonitor):
self._rules_lock.release()
return
if FILE_RETROACTIVE_EVENT in rule.pattern.event_mask:
if FILE_RETROACTIVE_EVENT in rule.pattern.event_mask \
or DIR_RETROACTIVE_EVENT in rule.pattern.event_mask:
# Determine what paths are potentially triggerable and gather
# files at those paths
testing_path = os.path.join(
@ -551,7 +553,7 @@ class WatchdogMonitor(BaseMonitor):
globble,
rule,
self.base_dir,
get_file_hash(globble, SHA256)
get_hash(globble, SHA256)
)
print_debug(self._print_target, self.debug_level,
f"Retroactive event for file at at {globble} hit rule "

View File

@ -209,7 +209,7 @@ def papermill_job_func(job_dir):
JOB_ERROR, STATUS_FAILED, get_job_file, \
get_result_file
from meow_base.functionality.file_io import read_yaml, write_notebook, write_yaml
from meow_base.functionality.hashing import get_file_hash
from meow_base.functionality.hashing import get_hash
from meow_base.functionality.parameterisation import parameterize_jupyter_notebook
@ -229,7 +229,7 @@ def papermill_job_func(job_dir):
# triggering event
if JOB_EVENT in job and WATCHDOG_HASH in job[JOB_EVENT]:
# get current hash
triggerfile_hash = get_file_hash(job[JOB_EVENT][EVENT_PATH], SHA256)
triggerfile_hash = get_hash(job[JOB_EVENT][EVENT_PATH], SHA256)
# If hash doesn't match, then abort the job. If its been modified, then
# another job will have been scheduled anyway.
if not triggerfile_hash \

View File

@ -185,7 +185,7 @@ def python_job_func(job_dir):
JOB_ERROR, STATUS_FAILED, get_base_file, \
get_job_file, get_result_file
from meow_base.functionality.file_io import read_yaml, write_yaml
from meow_base.functionality.hashing import get_file_hash
from meow_base.functionality.hashing import get_hash
from meow_base.functionality.parameterisation import parameterize_python_script
# Identify job files
@ -204,7 +204,7 @@ def python_job_func(job_dir):
# triggering event
if JOB_EVENT in job and WATCHDOG_HASH in job[JOB_EVENT]:
# get current hash
triggerfile_hash = get_file_hash(job[JOB_EVENT][EVENT_PATH], SHA256)
triggerfile_hash = get_hash(job[JOB_EVENT][EVENT_PATH], SHA256)
# If hash doesn't match, then abort the job. If its been modified, then
# another job will have been scheduled anyway.
if not triggerfile_hash \

View File

@ -1392,7 +1392,11 @@ GENERATE_PYTHON_SCRIPT = [
" del dataset"
]
COUNTING_PYTHON_SCRIPT = [
"import os",
"",
"dir_to_count = '.'",
"",
"print(f'There are {len(os.listdir(dir_to_count))} files in the directory.')"
]
valid_pattern_one = FileEventPattern(

View File

@ -16,7 +16,7 @@ from meow_base.core.correctness.vars import JOB_TYPE_PYTHON, SHA256, \
from meow_base.conductors import LocalPythonConductor, LocalBashConductor
from meow_base.functionality.file_io import read_file, read_yaml, write_file, \
write_notebook, write_yaml, lines_to_string, make_dir
from meow_base.functionality.hashing import get_file_hash
from meow_base.functionality.hashing import get_hash
from meow_base.functionality.meow import create_watchdog_event, create_job, \
create_rule
from meow_base.functionality.parameterisation import parameterize_bash_script
@ -69,7 +69,7 @@ class PythonTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("150")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",
@ -156,7 +156,7 @@ class PythonTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("Data")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",
@ -243,7 +243,7 @@ class PythonTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("Data")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",
@ -366,7 +366,7 @@ class PythonTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("Data")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",
@ -431,7 +431,7 @@ class PythonTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("Data")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",
@ -502,7 +502,7 @@ class PythonTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("Data")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",
@ -570,7 +570,7 @@ class PythonTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("Data")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",
@ -757,7 +757,7 @@ class BashTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("150")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",
@ -847,7 +847,7 @@ class BashTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("150")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",
@ -913,7 +913,7 @@ class BashTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("150")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",
@ -997,7 +997,7 @@ class BashTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("150")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",
@ -1066,7 +1066,7 @@ class BashTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("150")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",

View File

@ -22,7 +22,7 @@ from meow_base.functionality.debug import setup_debugging
from meow_base.functionality.file_io import lines_to_string, make_dir, \
read_file, read_file_lines, read_notebook, read_yaml, rmtree, write_file, \
write_notebook, write_yaml
from meow_base.functionality.hashing import get_file_hash
from meow_base.functionality.hashing import get_hash
from meow_base.functionality.meow import KEYWORD_BASE, KEYWORD_DIR, \
KEYWORD_EXTENSION, KEYWORD_FILENAME, KEYWORD_JOB, KEYWORD_PATH, \
KEYWORD_PREFIX, KEYWORD_REL_DIR, KEYWORD_REL_PATH, \
@ -340,7 +340,7 @@ class HashingTests(unittest.TestCase):
super().tearDown()
teardown()
# Test that get_file_hash produces the expected hash
# Test that get_hash produces the expected hash
def testGetFileHashSha256(self)->None:
file_path = os.path.join(TEST_MONITOR_BASE, "hased_file.txt")
with open(file_path, 'w') as hashed_file:
@ -348,15 +348,15 @@ class HashingTests(unittest.TestCase):
expected_hash = \
"8557122088c994ba8aa5540ccbb9a3d2d8ae2887046c2db23d65f40ae63abade"
hash = get_file_hash(file_path, SHA256)
hash = get_hash(file_path, SHA256)
self.assertEqual(hash, expected_hash)
# Test that get_file_hash raises on a missing file
# Test that get_hash raises on a missing file
def testGetFileHashSha256NoFile(self)->None:
file_path = os.path.join(TEST_MONITOR_BASE, "file.txt")
with self.assertRaises(FileNotFoundError):
get_file_hash(file_path, SHA256)
get_hash(file_path, SHA256)
class MeowTests(unittest.TestCase):

View File

@ -4,15 +4,18 @@ import os
import unittest
from multiprocessing import Pipe
from time import sleep
from meow_base.core.correctness.vars import FILE_CREATE_EVENT, EVENT_TYPE, \
EVENT_RULE, WATCHDOG_BASE, EVENT_TYPE_WATCHDOG, EVENT_PATH, SWEEP_START, \
SWEEP_JUMP, SWEEP_STOP
SWEEP_JUMP, SWEEP_STOP, DIR_EVENTS
from meow_base.functionality.file_io import make_dir
from meow_base.patterns.file_event_pattern import FileEventPattern, \
WatchdogMonitor, _DEFAULT_MASK
from meow_base.recipes.jupyter_notebook_recipe import JupyterNotebookRecipe
from shared import BAREBONES_NOTEBOOK, TEST_MONITOR_BASE, setup, teardown
from meow_base.recipes.python_recipe import PythonRecipe
from shared import BAREBONES_NOTEBOOK, TEST_MONITOR_BASE, \
COUNTING_PYTHON_SCRIPT, setup, teardown
def patterns_equal(tester, pattern_one, pattern_two):
@ -322,6 +325,77 @@ class WatchdogMonitorTests(unittest.TestCase):
wm.stop()
# Test WatchdogMonitor identifies directory content updates
def testMonitorDirectoryMonitoring(self)->None:
pattern_one = FileEventPattern(
"pattern_one",
os.path.join("top"),
"recipe_one",
"dir_to_count",
parameters={},
event_mask=DIR_EVENTS
)
recipe = PythonRecipe(
"recipe_one", COUNTING_PYTHON_SCRIPT)
patterns = {
pattern_one.name: pattern_one,
}
recipes = {
recipe.name: recipe,
}
wm = WatchdogMonitor(
TEST_MONITOR_BASE,
patterns,
recipes,
settletime=3
)
rules = wm.get_rules()
rule = rules[list(rules.keys())[0]]
from_monitor_reader, from_monitor_writer = Pipe()
wm.to_runner = from_monitor_writer
wm.start()
start_dir = os.path.join(TEST_MONITOR_BASE, "top")
contents = 10
make_dir(start_dir)
for i in range(contents):
with open(os.path.join(start_dir, f"{i}.txt"), "w") as f:
f.write("-")
sleep(1)
self.assertTrue(start_dir)
for i in range(contents):
self.assertTrue(os.path.exists(
os.path.join(start_dir, f"{i}.txt"))
)
messages = []
while True:
if from_monitor_reader.poll(5):
messages.append(from_monitor_reader.recv())
else:
break
self.assertTrue(len(messages), 1)
message = messages[0]
self.assertEqual(type(message), dict)
self.assertIn(EVENT_TYPE, message)
self.assertEqual(message[EVENT_TYPE], EVENT_TYPE_WATCHDOG)
self.assertIn(WATCHDOG_BASE, message)
self.assertEqual(message[WATCHDOG_BASE], TEST_MONITOR_BASE)
self.assertIn(EVENT_PATH, message)
self.assertEqual(message[EVENT_PATH], start_dir)
self.assertIn(EVENT_RULE, message)
self.assertEqual(message[EVENT_RULE].name, rule.name)
wm.stop()
# Test WatchdogMonitor identifies fake events for retroactive patterns
def testMonitoringRetroActive(self)->None:
pattern_one = FileEventPattern(
@ -389,6 +463,76 @@ class WatchdogMonitorTests(unittest.TestCase):
wm.stop()
# Test WatchdogMonitor identifies events for retroacive directory patterns
def testMonitorRetroActiveDirectory(self)->None:
contents = 10
start_dir = os.path.join(TEST_MONITOR_BASE, "top")
make_dir(start_dir)
for i in range(contents):
with open(os.path.join(start_dir, f"{i}.txt"), "w") as f:
f.write("-")
sleep(1)
self.assertTrue(start_dir)
for i in range(contents):
self.assertTrue(os.path.exists(
os.path.join(start_dir, f"{i}.txt"))
)
pattern_one = FileEventPattern(
"pattern_one",
os.path.join("top"),
"recipe_one",
"dir_to_count",
parameters={},
event_mask=DIR_EVENTS
)
recipe = PythonRecipe(
"recipe_one", COUNTING_PYTHON_SCRIPT)
patterns = {
pattern_one.name: pattern_one,
}
recipes = {
recipe.name: recipe,
}
wm = WatchdogMonitor(
TEST_MONITOR_BASE,
patterns,
recipes,
settletime=3
)
rules = wm.get_rules()
rule = rules[list(rules.keys())[0]]
from_monitor_reader, from_monitor_writer = Pipe()
wm.to_runner = from_monitor_writer
wm.start()
messages = []
while True:
if from_monitor_reader.poll(5):
messages.append(from_monitor_reader.recv())
else:
break
self.assertTrue(len(messages), 1)
message = messages[0]
self.assertEqual(type(message), dict)
self.assertIn(EVENT_TYPE, message)
self.assertEqual(message[EVENT_TYPE], EVENT_TYPE_WATCHDOG)
self.assertIn(WATCHDOG_BASE, message)
self.assertEqual(message[WATCHDOG_BASE], TEST_MONITOR_BASE)
self.assertIn(EVENT_PATH, message)
self.assertEqual(message[EVENT_PATH], start_dir)
self.assertIn(EVENT_RULE, message)
self.assertEqual(message[EVENT_RULE].name, rule.name)
wm.stop()
# Test WatchdogMonitor get_patterns function
def testMonitorGetPatterns(self)->None:
pattern_one = FileEventPattern(
@ -790,4 +934,3 @@ class WatchdogMonitorTests(unittest.TestCase):
self.assertIsInstance(rules, dict)
self.assertEqual(len(rules), 2)

View File

@ -18,7 +18,7 @@ from meow_base.core.correctness.vars import EVENT_TYPE, WATCHDOG_BASE, \
from meow_base.core.rule import Rule
from meow_base.functionality.file_io import lines_to_string, make_dir, \
read_yaml, write_file, write_notebook, write_yaml
from meow_base.functionality.hashing import get_file_hash
from meow_base.functionality.hashing import get_hash
from meow_base.functionality.meow import create_job, create_rules, \
create_rule, create_watchdog_event
from meow_base.functionality.parameterisation import parameterize_bash_script
@ -173,7 +173,7 @@ class PapermillHandlerTests(unittest.TestCase):
EVENT_PATH: os.path.join(TEST_MONITOR_BASE, "A"),
WATCHDOG_BASE: TEST_MONITOR_BASE,
EVENT_RULE: rule,
WATCHDOG_HASH: get_file_hash(
WATCHDOG_HASH: get_hash(
os.path.join(TEST_MONITOR_BASE, "A"), SHA256
)
}
@ -224,7 +224,7 @@ class PapermillHandlerTests(unittest.TestCase):
EVENT_PATH: os.path.join(TEST_MONITOR_BASE, "A"),
WATCHDOG_BASE: TEST_MONITOR_BASE,
EVENT_RULE: rule,
WATCHDOG_HASH: get_file_hash(
WATCHDOG_HASH: get_hash(
os.path.join(TEST_MONITOR_BASE, "A"), SHA256
)
}
@ -294,7 +294,7 @@ class PapermillHandlerTests(unittest.TestCase):
EVENT_PATH: os.path.join(TEST_MONITOR_BASE, "A"),
WATCHDOG_BASE: TEST_MONITOR_BASE,
EVENT_RULE: rule,
WATCHDOG_HASH: get_file_hash(
WATCHDOG_HASH: get_hash(
os.path.join(TEST_MONITOR_BASE, "A"), SHA256
)
}
@ -346,7 +346,7 @@ class PapermillHandlerTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("Data")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",
@ -591,7 +591,7 @@ class PythonHandlerTests(unittest.TestCase):
EVENT_PATH: os.path.join(TEST_MONITOR_BASE, "A"),
WATCHDOG_BASE: TEST_MONITOR_BASE,
EVENT_RULE: rule,
WATCHDOG_HASH: get_file_hash(
WATCHDOG_HASH: get_hash(
os.path.join(TEST_MONITOR_BASE, "A"), SHA256
)
}
@ -642,7 +642,7 @@ class PythonHandlerTests(unittest.TestCase):
EVENT_PATH: os.path.join(TEST_MONITOR_BASE, "A"),
WATCHDOG_BASE: TEST_MONITOR_BASE,
EVENT_RULE: rule,
WATCHDOG_HASH: get_file_hash(
WATCHDOG_HASH: get_hash(
os.path.join(TEST_MONITOR_BASE, "A"), SHA256
)
}
@ -712,7 +712,7 @@ class PythonHandlerTests(unittest.TestCase):
EVENT_PATH: os.path.join(TEST_MONITOR_BASE, "A"),
WATCHDOG_BASE: TEST_MONITOR_BASE,
EVENT_RULE: rule,
WATCHDOG_HASH: get_file_hash(
WATCHDOG_HASH: get_hash(
os.path.join(TEST_MONITOR_BASE, "A"), SHA256
)
}
@ -764,7 +764,7 @@ class PythonHandlerTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("250")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",
@ -1004,7 +1004,7 @@ class BashHandlerTests(unittest.TestCase):
EVENT_PATH: os.path.join(TEST_MONITOR_BASE, "A"),
WATCHDOG_BASE: TEST_MONITOR_BASE,
EVENT_RULE: rule,
WATCHDOG_HASH: get_file_hash(
WATCHDOG_HASH: get_hash(
os.path.join(TEST_MONITOR_BASE, "A"), SHA256
)
}
@ -1055,7 +1055,7 @@ class BashHandlerTests(unittest.TestCase):
EVENT_PATH: os.path.join(TEST_MONITOR_BASE, "A"),
WATCHDOG_BASE: TEST_MONITOR_BASE,
EVENT_RULE: rule,
WATCHDOG_HASH: get_file_hash(
WATCHDOG_HASH: get_hash(
os.path.join(TEST_MONITOR_BASE, "A"), SHA256
)
}
@ -1125,7 +1125,7 @@ class BashHandlerTests(unittest.TestCase):
EVENT_PATH: os.path.join(TEST_MONITOR_BASE, "A"),
WATCHDOG_BASE: TEST_MONITOR_BASE,
EVENT_RULE: rule,
WATCHDOG_HASH: get_file_hash(
WATCHDOG_HASH: get_hash(
os.path.join(TEST_MONITOR_BASE, "A"), SHA256
)
}
@ -1177,7 +1177,7 @@ class BashHandlerTests(unittest.TestCase):
with open(file_path, "w") as f:
f.write("250")
file_hash = get_file_hash(file_path, SHA256)
file_hash = get_hash(file_path, SHA256)
pattern = FileEventPattern(
"pattern",