added comments throughout

This commit is contained in:
PatchOfScotland
2023-01-31 14:36:38 +01:00
parent 31d06af5bf
commit b95042c5ca
10 changed files with 545 additions and 72 deletions

View File

@ -1,4 +1,10 @@
"""
This file contains definitions for a MEOW pattern based off of file events,
along with an appropriate monitor for said events.
Author(s): David Marchant
"""
import glob
import threading
import sys
@ -23,6 +29,7 @@ from core.functionality import print_debug, create_event, get_file_hash
from core.meow import BasePattern, BaseMonitor, BaseRule, BaseRecipe, \
create_rule
# Events that are monitored by default
_DEFAULT_MASK = [
FILE_CREATE_EVENT,
FILE_MODIFY_EVENT,
@ -30,20 +37,28 @@ _DEFAULT_MASK = [
FILE_RETROACTIVE_EVENT
]
# Parameter sweep keys
SWEEP_START = "start"
SWEEP_STOP = "stop"
SWEEP_JUMP = "jump"
class FileEventPattern(BasePattern):
# The path at which events will trigger this pattern
triggering_path:str
# The variable name given to the triggering file within recipe code
triggering_file:str
# Which types of event the pattern responds to
event_mask:list[str]
# TODO move me to BasePattern defintion
# A collection of variables to be swept over for job scheduling
sweep:dict[str,Any]
def __init__(self, name:str, triggering_path:str, recipe:str,
triggering_file:str, event_mask:list[str]=_DEFAULT_MASK,
parameters:dict[str,Any]={}, outputs:dict[str,Any]={},
sweep:dict[str,Any]={}):
"""FileEventPattern Constructor. This is used to match against file
system events, as caught by the python watchdog module."""
super().__init__(name, recipe, parameters, outputs)
self._is_valid_triggering_path(triggering_path)
self.triggering_path = triggering_path
@ -54,10 +69,9 @@ class FileEventPattern(BasePattern):
self._is_valid_sweep(sweep)
self.sweep = sweep
def _is_valid_recipe(self, recipe:str)->None:
valid_string(recipe, VALID_RECIPE_NAME_CHARS)
def _is_valid_triggering_path(self, triggering_path:str)->None:
"""Validation check for 'triggering_path' variable from main
constructor."""
valid_path(triggering_path)
if len(triggering_path) < 1:
raise ValueError (
@ -66,19 +80,31 @@ class FileEventPattern(BasePattern):
)
def _is_valid_triggering_file(self, triggering_file:str)->None:
"""Validation check for 'triggering_file' variable from main
constructor."""
valid_string(triggering_file, VALID_VARIABLE_NAME_CHARS)
def _is_valid_recipe(self, recipe:str)->None:
"""Validation check for 'recipe' variable from main constructor.
Called within parent BasePattern constructor."""
valid_string(recipe, VALID_RECIPE_NAME_CHARS)
def _is_valid_parameters(self, parameters:dict[str,Any])->None:
"""Validation check for 'parameters' variable from main constructor.
Called within parent BasePattern constructor."""
valid_dict(parameters, str, Any, strict=False, min_length=0)
for k in parameters.keys():
valid_string(k, VALID_VARIABLE_NAME_CHARS)
def _is_valid_output(self, outputs:dict[str,str])->None:
"""Validation check for 'output' variable from main constructor.
Called within parent BasePattern constructor."""
valid_dict(outputs, str, str, strict=False, min_length=0)
for k in outputs.keys():
valid_string(k, VALID_VARIABLE_NAME_CHARS)
def _is_valid_event_mask(self, event_mask)->None:
"""Validation check for 'event_mask' variable from main constructor."""
valid_list(event_mask, str, min_length=1)
for mask in event_mask:
if mask not in FILE_EVENTS:
@ -86,6 +112,7 @@ class FileEventPattern(BasePattern):
f"{FILE_EVENTS}")
def _is_valid_sweep(self, sweep)->None:
"""Validation check for 'sweep' variable from main constructor."""
check_type(sweep, dict)
if not sweep:
return
@ -109,30 +136,42 @@ class FileEventPattern(BasePattern):
elif v[SWEEP_JUMP] > 0:
if not v[SWEEP_STOP] > v[SWEEP_START]:
raise ValueError(
"Cannot create sweep with a positive '{SWEEP_JUMP}' "
f"Cannot create sweep with a positive '{SWEEP_JUMP}' "
"value where the end point is smaller than the start."
)
elif v[SWEEP_JUMP] < 0:
if not v[SWEEP_STOP] < v[SWEEP_START]:
raise ValueError(
"Cannot create sweep with a negative '{SWEEP_JUMP}' "
f"Cannot create sweep with a negative '{SWEEP_JUMP}' "
"value where the end point is smaller than the start."
)
class WatchdogMonitor(BaseMonitor):
# A handler object, to catch events
event_handler:PatternMatchingEventHandler
# The watchdog observer object
monitor:Observer
# The base monitored directory
base_dir:str
# Config option, above which debug messages are ignored
debug_level:int
# Where print messages are sent
_print_target:Any
#A lock to solve race conditions on '_patterns'
_patterns_lock:threading.Lock
#A lock to solve race conditions on '_recipes'
_recipes_lock:threading.Lock
#A lock to solve race conditions on '_rules'
_rules_lock:threading.Lock
def __init__(self, base_dir:str, patterns:dict[str,FileEventPattern],
recipes:dict[str,BaseRecipe], autostart=False, settletime:int=1,
print:Any=sys.stdout, logging:int=0)->None:
"""WatchdogEventHandler Constructor. This uses the watchdog module to
monitor a directory and all its sub-directories. Watchdog will provide
the monitor with an caught events, with the monitor comparing them
against its rules, and informing the runner of match."""
super().__init__(patterns, recipes)
self._is_valid_base_dir(base_dir)
self.base_dir = base_dir
@ -155,22 +194,28 @@ class WatchdogMonitor(BaseMonitor):
self.start()
def start(self)->None:
"""Function to start the monitor."""
print_debug(self._print_target, self.debug_level,
"Starting WatchdogMonitor", DEBUG_INFO)
self._apply_retroactive_rules()
self.monitor.start()
def stop(self)->None:
"""Function to stop the monitor."""
print_debug(self._print_target, self.debug_level,
"Stopping WatchdogMonitor", DEBUG_INFO)
self.monitor.stop()
def match(self, event)->None:
"""Function to determine if a given event matches the current rules."""
src_path = event.src_path
event_type = "dir_"+ event.event_type if event.is_directory \
else "file_" + event.event_type
# Remove the base dir from the path as trigger paths are given relative
# to that
handle_path = src_path.replace(self.base_dir, '', 1)
# Also remove leading slashes, so we don't go off of the root directory
while handle_path.startswith(os.path.sep):
handle_path = handle_path[1:]
@ -178,15 +223,18 @@ class WatchdogMonitor(BaseMonitor):
try:
for rule in self._rules.values():
# Skip events not within the event mask
if event_type not in rule.pattern.event_mask:
continue
# Use regex to match event paths against rule paths
target_path = rule.pattern.triggering_path
recursive_regexp = translate(target_path)
direct_regexp = recursive_regexp.replace('.*', '[^/]*')
recursive_hit = match(recursive_regexp, handle_path)
direct_hit = match(direct_regexp, handle_path)
# If matched, thte create a watchdog event
if direct_hit or recursive_hit:
meow_event = create_event(
WATCHDOG_TYPE,
@ -203,6 +251,7 @@ class WatchdogMonitor(BaseMonitor):
print_debug(self._print_target, self.debug_level,
f"Event at {src_path} of type {event_type} hit rule "
f"{rule.name}", DEBUG_INFO)
# Send the event to the runner
self.to_runner.send(meow_event)
except Exception as e:
@ -212,6 +261,9 @@ class WatchdogMonitor(BaseMonitor):
self._rules_lock.release()
def add_pattern(self, pattern:FileEventPattern)->None:
"""Function to add a pattern to the current definitions. Any rules
that can be possibly created from that pattern will be automatically
created."""
check_type(pattern, FileEventPattern)
self._patterns_lock.acquire()
try:
@ -227,12 +279,16 @@ class WatchdogMonitor(BaseMonitor):
self._identify_new_rules(new_pattern=pattern)
def update_pattern(self, pattern:FileEventPattern)->None:
"""Function to update a pattern in the current definitions. Any rules
created from that pattern will be automatically updated."""
check_type(pattern, FileEventPattern)
self.remove_pattern(pattern.name)
print(f"adding pattern w/ recipe {pattern.recipe}")
self.add_pattern(pattern)
def remove_pattern(self, pattern: Union[str,FileEventPattern])->None:
"""Function to remove a pattern from the current definitions. Any rules
that will be no longer valid will be automatically removed."""
check_type(pattern, str, alt_types=[FileEventPattern])
lookup_key = pattern
if isinstance(lookup_key, FileEventPattern):
@ -253,7 +309,10 @@ class WatchdogMonitor(BaseMonitor):
else:
self._identify_lost_rules(lost_pattern=pattern)
def get_patterns(self)->None:
def get_patterns(self)->dict[str,FileEventPattern]:
"""Function to get a dict of the currently defined patterns of the
monitor. Note that the result is deep-copied, and so can be manipulated
without directly manipulating the internals of the monitor."""
to_return = {}
self._patterns_lock.acquire()
try:
@ -265,6 +324,9 @@ class WatchdogMonitor(BaseMonitor):
return to_return
def add_recipe(self, recipe: BaseRecipe)->None:
"""Function to add a recipe to the current definitions. Any rules
that can be possibly created from that recipe will be automatically
created."""
check_type(recipe, BaseRecipe)
self._recipes_lock.acquire()
try:
@ -280,17 +342,22 @@ class WatchdogMonitor(BaseMonitor):
self._identify_new_rules(new_recipe=recipe)
def update_recipe(self, recipe: BaseRecipe)->None:
"""Function to update a recipe in the current definitions. Any rules
created from that recipe will be automatically updated."""
check_type(recipe, BaseRecipe)
self.remove_recipe(recipe.name)
self.add_recipe(recipe)
def remove_recipe(self, recipe:Union[str,BaseRecipe])->None:
"""Function to remove a recipe from the current definitions. Any rules
that will be no longer valid will be automatically removed."""
check_type(recipe, str, alt_types=[BaseRecipe])
lookup_key = recipe
if isinstance(lookup_key, BaseRecipe):
lookup_key = recipe.name
self._recipes_lock.acquire()
try:
# Check that recipe has not already been deleted
if lookup_key not in self._recipes:
raise KeyError(f"Cannot remote Recipe '{lookup_key}' as it "
"does not already exist")
@ -305,7 +372,10 @@ class WatchdogMonitor(BaseMonitor):
else:
self._identify_lost_rules(lost_recipe=recipe)
def get_recipes(self)->None:
def get_recipes(self)->dict[str,BaseRecipe]:
"""Function to get a dict of the currently defined recipes of the
monitor. Note that the result is deep-copied, and so can be manipulated
without directly manipulating the internals of the monitor."""
to_return = {}
self._recipes_lock.acquire()
try:
@ -316,7 +386,10 @@ class WatchdogMonitor(BaseMonitor):
self._recipes_lock.release()
return to_return
def get_rules(self)->None:
def get_rules(self)->dict[str,BaseRule]:
"""Function to get a dict of the currently defined rules of the
monitor. Note that the result is deep-copied, and so can be manipulated
without directly manipulating the internals of the monitor."""
to_return = {}
self._rules_lock.acquire()
try:
@ -329,15 +402,20 @@ class WatchdogMonitor(BaseMonitor):
def _identify_new_rules(self, new_pattern:FileEventPattern=None,
new_recipe:BaseRecipe=None)->None:
"""Function to determine if a new rule can be created given a new
pattern or recipe, in light of other existing patterns or recipes in
the monitor."""
if new_pattern:
self._patterns_lock.acquire()
self._recipes_lock.acquire()
try:
# Check in case pattern has been deleted since function called
if new_pattern.name not in self._patterns:
self._patterns_lock.release()
self._recipes_lock.release()
return
# If pattern specifies recipe that already exists, make a rule
if new_pattern.recipe in self._recipes:
self._create_new_rule(
new_pattern,
@ -354,10 +432,12 @@ class WatchdogMonitor(BaseMonitor):
self._patterns_lock.acquire()
self._recipes_lock.acquire()
try:
# Check in case recipe has been deleted since function called
if new_recipe.name not in self._recipes:
self._patterns_lock.release()
self._recipes_lock.release()
return
# If recipe is specified by existing pattern, make a rule
for pattern in self._patterns.values():
if pattern.recipe == new_recipe.name:
self._create_new_rule(
@ -373,14 +453,18 @@ class WatchdogMonitor(BaseMonitor):
def _identify_lost_rules(self, lost_pattern:str=None,
lost_recipe:str=None)->None:
"""Function to remove rules that should be deleted in response to a
pattern or recipe having been deleted."""
to_delete = []
self._rules_lock.acquire()
try:
# Identify any offending rules
for name, rule in self._rules.items():
if lost_pattern and rule.pattern.name == lost_pattern:
to_delete.append(name)
to_delete.append(name)
if lost_recipe and rule.recipe.name == lost_recipe:
to_delete.append(name)
to_delete.append(name)
# Now delete them
for delete in to_delete:
if delete in self._rules.keys():
self._rules.pop(delete)
@ -389,7 +473,12 @@ class WatchdogMonitor(BaseMonitor):
raise e
self._rules_lock.release()
def _create_new_rule(self, pattern:FileEventPattern, recipe:BaseRecipe)->None:
def _create_new_rule(self, pattern:FileEventPattern,
recipe:BaseRecipe)->None:
"""Function to create a new rule from a given pattern and recipe. This
will only be called to create rules at runtime, as rules are
automatically created at initialisation using the same 'create_rule'
function called here."""
rule = create_rule(pattern, recipe)
self._rules_lock.acquire()
try:
@ -405,31 +494,45 @@ class WatchdogMonitor(BaseMonitor):
self._apply_retroactive_rule(rule)
def _is_valid_base_dir(self, base_dir:str)->None:
"""Validation check for 'base_dir' variable from main constructor. Is
automatically called during initialisation."""
valid_existing_dir_path(base_dir)
def _is_valid_patterns(self, patterns:dict[str,FileEventPattern])->None:
"""Validation check for 'patterns' variable from main constructor. Is
automatically called during initialisation."""
valid_dict(patterns, str, FileEventPattern, min_length=0, strict=False)
def _is_valid_recipes(self, recipes:dict[str,BaseRecipe])->None:
"""Validation check for 'recipes' variable from main constructor. Is
automatically called during initialisation."""
valid_dict(recipes, str, BaseRecipe, min_length=0, strict=False)
def _apply_retroactive_rules(self)->None:
"""Function to determine if any rules should be applied to the existing
file structure, were the file structure created/modified now."""
for rule in self._rules.values():
self._apply_retroactive_rule(rule)
def _apply_retroactive_rule(self, rule:BaseRule)->None:
"""Function to determine if a rule should be applied to the existing
file structure, were the file structure created/modified now."""
self._rules_lock.acquire()
try:
# Check incase rule deleted since this function first called
if rule.name not in self._rules:
self._rules_lock.release()
return
if FILE_RETROACTIVE_EVENT in rule.pattern.event_mask:
# Determine what paths are potentially triggerable and gather
# files at those paths
testing_path = os.path.join(
self.base_dir, rule.pattern.triggering_path)
globbed = glob.glob(testing_path)
# For each file create a fake event.
for globble in globbed:
meow_event = create_event(
@ -440,6 +543,7 @@ class WatchdogMonitor(BaseMonitor):
print_debug(self._print_target, self.debug_level,
f"Retroactive event for file at at {globble} hit rule "
f"{rule.name}", DEBUG_INFO)
# Send it to the runner
self.to_runner.send(meow_event)
except Exception as e:
@ -449,11 +553,19 @@ class WatchdogMonitor(BaseMonitor):
class WatchdogEventHandler(PatternMatchingEventHandler):
# The monitor class running this handler
monitor:WatchdogMonitor
# A time to wait per event path, during which extra events are discared
_settletime:int
# TODO clean this struct occasionally
# A dict of recent job timestamps
_recent_jobs:dict[str, Any]
# A lock to solve race conditions on '_recent_jobs'
_recent_jobs_lock:threading.Lock
def __init__(self, monitor:WatchdogMonitor, settletime:int=1):
"""WatchdogEventHandler Constructor. This inherits from watchdog
PatternMatchingEventHandler, and is used to catch events, then filter
out excessive events at the same location."""
super().__init__()
self.monitor = monitor
self._settletime = settletime
@ -461,12 +573,19 @@ class WatchdogEventHandler(PatternMatchingEventHandler):
self._recent_jobs_lock = threading.Lock()
def threaded_handler(self, event):
"""Function to determine if the given event shall be sent on to the
monitor. After each event we wait for '_settletime', to catch
subsequent events at the same location, so as to not swamp the system
with repeated events."""
self._recent_jobs_lock.acquire()
try:
if event.src_path in self._recent_jobs:
recent_timestamp = self._recent_jobs[event.src_path]
difference = event.time_stamp - recent_timestamp
# Discard the event if we already have a recent event at this
# same path. Update the most recent time, so we can hopefully
# wait till events have stopped happening
if difference <= self._settletime:
self._recent_jobs[event.src_path] = \
max(recent_timestamp, event.time_stamp)
@ -481,9 +600,14 @@ class WatchdogEventHandler(PatternMatchingEventHandler):
raise Exception(ex)
self._recent_jobs_lock.release()
# If we did not have a recent event, then send it on to the monitor
self.monitor.match(event)
def handle_event(self, event):
"""Handler function, called by all specific event functions. Will
attach a timestamp to the event immediately, and attempt to start a
threaded_handler so that the monitor can resume monitoring as soon as
possible."""
event.time_stamp = time()
waiting_for_threaded_resources = True
@ -499,16 +623,21 @@ class WatchdogEventHandler(PatternMatchingEventHandler):
sleep(1)
def on_created(self, event):
"""Function called when a file created event occurs."""
self.handle_event(event)
def on_modified(self, event):
"""Function called when a file modified event occurs."""
self.handle_event(event)
def on_moved(self, event):
"""Function called when a file moved event occurs."""
self.handle_event(event)
def on_deleted(self, event):
"""Function called when a file deleted event occurs."""
self.handle_event(event)
def on_closed(self, event):
"""Function called when a file closed event occurs."""
self.handle_event(event)