463 lines
17 KiB
Python
463 lines
17 KiB
Python
|
|
"""
|
|
This file contains definitions for a MEOW pattern based off of file events,
|
|
along with an appropriate monitor for said events.
|
|
|
|
Author(s): David Marchant
|
|
"""
|
|
import glob
|
|
import threading
|
|
import sys
|
|
import os
|
|
|
|
from fnmatch import translate
|
|
from re import match
|
|
from time import time, sleep
|
|
from typing import Any, Union, Dict, List
|
|
from watchdog.observers import Observer
|
|
from watchdog.events import PatternMatchingEventHandler
|
|
|
|
from meow_base.core.base_recipe import BaseRecipe
|
|
from meow_base.core.base_monitor import BaseMonitor
|
|
from meow_base.core.base_pattern import BasePattern
|
|
from meow_base.core.meow import EVENT_KEYS, valid_meow_dict
|
|
from meow_base.core.rule import Rule
|
|
from meow_base.functionality.validation import check_type, valid_string, \
|
|
valid_dict, valid_list, valid_dir_path
|
|
from meow_base.core.vars import VALID_RECIPE_NAME_CHARS, \
|
|
VALID_VARIABLE_NAME_CHARS, FILE_EVENTS, FILE_CREATE_EVENT, \
|
|
FILE_MODIFY_EVENT, FILE_MOVED_EVENT, DEBUG_INFO, DIR_EVENTS, \
|
|
FILE_RETROACTIVE_EVENT, SHA256, VALID_PATH_CHARS, FILE_CLOSED_EVENT, \
|
|
DIR_RETROACTIVE_EVENT
|
|
from meow_base.functionality.debug import setup_debugging, print_debug
|
|
from meow_base.functionality.hashing import get_hash
|
|
from meow_base.functionality.meow import create_event
|
|
|
|
# Events that are monitored by default
|
|
_DEFAULT_MASK = [
|
|
FILE_CREATE_EVENT,
|
|
FILE_MODIFY_EVENT,
|
|
FILE_MOVED_EVENT,
|
|
FILE_RETROACTIVE_EVENT,
|
|
FILE_CLOSED_EVENT
|
|
]
|
|
|
|
# watchdog events
|
|
EVENT_TYPE_WATCHDOG = "watchdog"
|
|
WATCHDOG_BASE = "monitor_base"
|
|
WATCHDOG_HASH = "file_hash"
|
|
|
|
WATCHDOG_EVENT_KEYS = {
|
|
WATCHDOG_BASE: str,
|
|
WATCHDOG_HASH: str,
|
|
**EVENT_KEYS
|
|
}
|
|
|
|
def create_watchdog_event(path:str, rule:Any, base:str, time:float,
|
|
hash:str, extras:Dict[Any,Any]={})->Dict[Any,Any]:
|
|
"""Function to create a MEOW event dictionary."""
|
|
return create_event(
|
|
EVENT_TYPE_WATCHDOG,
|
|
path,
|
|
rule,
|
|
time,
|
|
extras={
|
|
**extras,
|
|
**{
|
|
WATCHDOG_HASH: hash,
|
|
WATCHDOG_BASE: base
|
|
}
|
|
}
|
|
)
|
|
|
|
def valid_watchdog_event(event:Dict[str,Any])->None:
|
|
valid_meow_dict(event, "Watchdog event", WATCHDOG_EVENT_KEYS)
|
|
|
|
|
|
class FileEventPattern(BasePattern):
|
|
# The path at which events will trigger this pattern
|
|
triggering_path:str
|
|
# The variable name given to the triggering file within recipe code
|
|
triggering_file:str
|
|
# Which types of event the pattern responds to
|
|
event_mask:List[str]
|
|
def __init__(self, name:str, triggering_path:str, recipe:str,
|
|
triggering_file:str, event_mask:List[str]=_DEFAULT_MASK,
|
|
parameters:Dict[str,Any]={}, outputs:Dict[str,Any]={},
|
|
sweep:Dict[str,Any]={}):
|
|
"""FileEventPattern Constructor. This is used to match against file
|
|
system events, as caught by the python watchdog module."""
|
|
super().__init__(name, recipe, parameters, outputs, sweep)
|
|
self._is_valid_triggering_path(triggering_path)
|
|
self.triggering_path = triggering_path
|
|
self._is_valid_triggering_file(triggering_file)
|
|
self.triggering_file = triggering_file
|
|
self._is_valid_event_mask(event_mask)
|
|
self.event_mask = event_mask
|
|
|
|
def _is_valid_triggering_path(self, triggering_path:str)->None:
|
|
"""Validation check for 'triggering_path' variable from main
|
|
constructor."""
|
|
valid_string(
|
|
triggering_path,
|
|
VALID_PATH_CHARS+'*',
|
|
min_length=1,
|
|
hint="FileEventPattern.triggering_path"
|
|
)
|
|
if len(triggering_path) < 1:
|
|
raise ValueError (
|
|
f"triggiering path '{triggering_path}' is too short. "
|
|
"Minimum length is 1"
|
|
)
|
|
|
|
def _is_valid_triggering_file(self, triggering_file:str)->None:
|
|
"""Validation check for 'triggering_file' variable from main
|
|
constructor."""
|
|
valid_string(
|
|
triggering_file,
|
|
VALID_VARIABLE_NAME_CHARS,
|
|
hint="FileEventPattern.triggering_file"
|
|
)
|
|
|
|
def _is_valid_recipe(self, recipe:str)->None:
|
|
"""Validation check for 'recipe' variable from main constructor.
|
|
Called within parent BasePattern constructor."""
|
|
valid_string(
|
|
recipe,
|
|
VALID_RECIPE_NAME_CHARS,
|
|
hint="FileEventPattern.recipe"
|
|
)
|
|
|
|
def _is_valid_parameters(self, parameters:Dict[str,Any])->None:
|
|
"""Validation check for 'parameters' variable from main constructor.
|
|
Called within parent BasePattern constructor."""
|
|
valid_dict(
|
|
parameters,
|
|
str,
|
|
Any,
|
|
strict=False,
|
|
min_length=0,
|
|
hint="FileEventPattern.parameters"
|
|
)
|
|
for k in parameters.keys():
|
|
valid_string(
|
|
k,
|
|
VALID_VARIABLE_NAME_CHARS,
|
|
hint=f"FileEventPattern.parameters[{k}]"
|
|
)
|
|
|
|
def _is_valid_output(self, outputs:Dict[str,str])->None:
|
|
"""Validation check for 'output' variable from main constructor.
|
|
Called within parent BasePattern constructor."""
|
|
valid_dict(
|
|
outputs,
|
|
str,
|
|
str,
|
|
strict=False,
|
|
min_length=0,
|
|
hint="FileEventPattern.outputs"
|
|
)
|
|
for k in outputs.keys():
|
|
valid_string(
|
|
k,
|
|
VALID_VARIABLE_NAME_CHARS,
|
|
hint=f"FileEventPattern.outputs[{k}]"
|
|
)
|
|
|
|
def _is_valid_event_mask(self, event_mask)->None:
|
|
"""Validation check for 'event_mask' variable from main constructor."""
|
|
valid_list(
|
|
event_mask,
|
|
str,
|
|
min_length=1,
|
|
hint="FileEventPattern.event_mask"
|
|
)
|
|
for mask in event_mask:
|
|
if mask not in FILE_EVENTS + DIR_EVENTS:
|
|
raise ValueError(f"Invalid event mask '{mask}'. Valid are: "
|
|
f"{FILE_EVENTS + DIR_EVENTS}")
|
|
|
|
def _is_valid_sweep(self, sweep: Dict[str,Union[int,float,complex]]) -> None:
|
|
"""Validation check for 'sweep' variable from main constructor."""
|
|
return super()._is_valid_sweep(sweep)
|
|
|
|
|
|
class WatchdogMonitor(BaseMonitor):
|
|
# A handler object, to catch events
|
|
event_handler:PatternMatchingEventHandler
|
|
# The watchdog observer object
|
|
monitor:Observer
|
|
# The base monitored directory
|
|
base_dir:str
|
|
# Config option, above which debug messages are ignored
|
|
debug_level:int
|
|
# Where print messages are sent
|
|
_print_target:Any
|
|
def __init__(self, base_dir:str, patterns:Dict[str,FileEventPattern],
|
|
recipes:Dict[str,BaseRecipe], autostart=False, settletime:int=1,
|
|
name:str="", print:Any=sys.stdout, logging:int=0)->None:
|
|
"""WatchdogEventHandler Constructor. This uses the watchdog module to
|
|
monitor a directory and all its sub-directories. Watchdog will provide
|
|
the monitor with an caught events, with the monitor comparing them
|
|
against its rules, and informing the runner of match."""
|
|
super().__init__(patterns, recipes, name=name)
|
|
self._is_valid_base_dir(base_dir)
|
|
self.base_dir = base_dir
|
|
check_type(settletime, int, hint="WatchdogMonitor.settletime")
|
|
self._print_target, self.debug_level = setup_debugging(print, logging)
|
|
self.event_handler = WatchdogEventHandler(self, settletime=settletime)
|
|
self.monitor = Observer()
|
|
self.monitor.schedule(
|
|
self.event_handler,
|
|
self.base_dir,
|
|
recursive=True
|
|
)
|
|
print_debug(self._print_target, self.debug_level,
|
|
"Created new WatchdogMonitor instance", DEBUG_INFO)
|
|
|
|
if autostart:
|
|
self.start()
|
|
|
|
def start(self)->None:
|
|
"""Function to start the monitor."""
|
|
print_debug(self._print_target, self.debug_level,
|
|
"Starting WatchdogMonitor", DEBUG_INFO)
|
|
self._apply_retroactive_rules()
|
|
self.monitor.start()
|
|
|
|
def stop(self)->None:
|
|
"""Function to stop the monitor."""
|
|
print_debug(self._print_target, self.debug_level,
|
|
"Stopping WatchdogMonitor", DEBUG_INFO)
|
|
self.monitor.stop()
|
|
|
|
def match(self, event)->None:
|
|
"""Function to determine if a given event matches the current rules."""
|
|
src_path = event.src_path
|
|
|
|
prepend = "dir_" if event.is_directory else "file_"
|
|
event_types = [prepend+i for i in event.event_type]
|
|
|
|
# Remove the base dir from the path as trigger paths are given relative
|
|
# to that
|
|
handle_path = src_path.replace(self.base_dir, '', 1)
|
|
# Also remove leading slashes, so we don't go off of the root directory
|
|
while handle_path.startswith(os.path.sep):
|
|
handle_path = handle_path[1:]
|
|
|
|
self._rules_lock.acquire()
|
|
try:
|
|
for rule in self._rules.values():
|
|
|
|
# Skip events not within the event mask
|
|
if any(i in event_types for i in rule.pattern.event_mask) \
|
|
!= True:
|
|
continue
|
|
|
|
# Use regex to match event paths against rule paths
|
|
target_path = rule.pattern.triggering_path
|
|
recursive_regexp = translate(target_path)
|
|
if os.name == 'nt':
|
|
direct_regexp = recursive_regexp.replace(
|
|
'.*', '[^'+ os.path.sep + os.path.sep +']*')
|
|
else:
|
|
direct_regexp = recursive_regexp.replace(
|
|
'.*', '[^'+ os.path.sep +']*')
|
|
recursive_hit = match(recursive_regexp, handle_path)
|
|
direct_hit = match(direct_regexp, handle_path)
|
|
|
|
# If matched, the create a watchdog event
|
|
if direct_hit or recursive_hit:
|
|
meow_event = create_watchdog_event(
|
|
event.src_path,
|
|
rule,
|
|
self.base_dir,
|
|
event.time_stamp,
|
|
get_hash(event.src_path, SHA256)
|
|
)
|
|
print_debug(self._print_target, self.debug_level,
|
|
f"Event at {src_path} hit rule {rule.name}",
|
|
DEBUG_INFO)
|
|
# Send the event to the runner
|
|
self.send_event_to_runner(meow_event)
|
|
|
|
except Exception as e:
|
|
self._rules_lock.release()
|
|
raise e
|
|
|
|
self._rules_lock.release()
|
|
|
|
def _is_valid_base_dir(self, base_dir:str)->None:
|
|
"""Validation check for 'base_dir' variable from main constructor. Is
|
|
automatically called during initialisation."""
|
|
valid_dir_path(base_dir, must_exist=True)
|
|
|
|
def _is_valid_patterns(self, patterns:Dict[str,FileEventPattern])->None:
|
|
"""Validation check for 'patterns' variable from main constructor. Is
|
|
automatically called during initialisation."""
|
|
valid_dict(patterns, str, FileEventPattern, min_length=0, strict=False)
|
|
|
|
def _is_valid_recipes(self, recipes:Dict[str,BaseRecipe])->None:
|
|
"""Validation check for 'recipes' variable from main constructor. Is
|
|
automatically called during initialisation."""
|
|
valid_dict(recipes, str, BaseRecipe, min_length=0, strict=False)
|
|
|
|
def _get_valid_pattern_types(self)->List[type]:
|
|
return [FileEventPattern]
|
|
|
|
def _get_valid_recipe_types(self)->List[type]:
|
|
return [BaseRecipe]
|
|
|
|
def _apply_retroactive_rule(self, rule:Rule)->None:
|
|
"""Function to determine if a rule should be applied to the existing
|
|
file structure, were the file structure created/modified now."""
|
|
self._rules_lock.acquire()
|
|
try:
|
|
# Check incase rule deleted since this function first called
|
|
if rule.name not in self._rules:
|
|
self._rules_lock.release()
|
|
return
|
|
|
|
if FILE_RETROACTIVE_EVENT in rule.pattern.event_mask \
|
|
or DIR_RETROACTIVE_EVENT in rule.pattern.event_mask:
|
|
# Determine what paths are potentially triggerable and gather
|
|
# files at those paths
|
|
testing_path = os.path.join(
|
|
self.base_dir, rule.pattern.triggering_path)
|
|
|
|
globbed = glob.glob(testing_path)
|
|
|
|
# For each file create a fake event.
|
|
for globble in globbed:
|
|
|
|
meow_event = create_watchdog_event(
|
|
globble,
|
|
rule,
|
|
self.base_dir,
|
|
time(),
|
|
get_hash(globble, SHA256)
|
|
)
|
|
print_debug(self._print_target, self.debug_level,
|
|
f"Retroactive event for file at at {globble} hit rule "
|
|
f"{rule.name}", DEBUG_INFO)
|
|
# Send it to the runner
|
|
self.send_event_to_runner(meow_event)
|
|
|
|
except Exception as e:
|
|
self._rules_lock.release()
|
|
raise e
|
|
self._rules_lock.release()
|
|
|
|
def _apply_retroactive_rules(self)->None:
|
|
"""Function to determine if any rules should be applied to the existing
|
|
file structure, were the file structure created/modified now."""
|
|
for rule in self._rules.values():
|
|
self._apply_retroactive_rule(rule)
|
|
|
|
|
|
class WatchdogEventHandler(PatternMatchingEventHandler):
|
|
# The monitor class running this handler
|
|
monitor:WatchdogMonitor
|
|
# A time to wait per event path, during which extra events are discared
|
|
_settletime:int
|
|
# TODO clean this struct occasionally
|
|
# A Dict of recent job timestamps
|
|
_recent_jobs:Dict[str, Any]
|
|
# A lock to solve race conditions on '_recent_jobs'
|
|
_recent_jobs_lock:threading.Lock
|
|
def __init__(self, monitor:WatchdogMonitor, settletime:int=1):
|
|
"""WatchdogEventHandler Constructor. This inherits from watchdog
|
|
PatternMatchingEventHandler, and is used to catch events, then filter
|
|
out excessive events at the same location."""
|
|
super().__init__()
|
|
self.monitor = monitor
|
|
self._settletime = settletime
|
|
self._recent_jobs = {}
|
|
self._recent_jobs_lock = threading.Lock()
|
|
|
|
def threaded_handler(self, event):
|
|
"""Function to determine if the given event shall be sent on to the
|
|
monitor. After each event we wait for '_settletime', to catch
|
|
subsequent events at the same location, so as to not swamp the system
|
|
with repeated events."""
|
|
|
|
self._recent_jobs_lock.acquire()
|
|
try:
|
|
if event.src_path in self._recent_jobs:
|
|
if event.time_stamp > self._recent_jobs[event.src_path][0]:
|
|
self._recent_jobs[event.src_path][0] = event.time_stamp
|
|
self._recent_jobs[event.src_path][1].add(event.event_type)
|
|
else:
|
|
self._recent_jobs_lock.release()
|
|
return
|
|
else:
|
|
self._recent_jobs[event.src_path] = \
|
|
[event.time_stamp, {event.event_type}]
|
|
|
|
# If we have a closed event then short-cut the wait and send event
|
|
# immediately
|
|
if event.event_type == FILE_CLOSED_EVENT:
|
|
self.monitor.match(event)
|
|
self._recent_jobs_lock.release()
|
|
return
|
|
|
|
except Exception as ex:
|
|
self._recent_jobs_lock.release()
|
|
raise Exception(ex)
|
|
self._recent_jobs_lock.release()
|
|
|
|
sleep(self._settletime)
|
|
|
|
self._recent_jobs_lock.acquire()
|
|
try:
|
|
if event.src_path in self._recent_jobs \
|
|
and event.time_stamp < self._recent_jobs[event.src_path][0]:
|
|
self._recent_jobs_lock.release()
|
|
return
|
|
except Exception as ex:
|
|
self._recent_jobs_lock.release()
|
|
raise Exception(ex)
|
|
event.event_type = self._recent_jobs[event.src_path][1]
|
|
self._recent_jobs_lock.release()
|
|
|
|
self.monitor.match(event)
|
|
|
|
def handle_event(self, event):
|
|
"""Handler function, called by all specific event functions. Will
|
|
attach a timestamp to the event immediately, and attempt to start a
|
|
threaded_handler so that the monitor can resume monitoring as soon as
|
|
possible."""
|
|
event.time_stamp = time()
|
|
|
|
waiting_for_threaded_resources = True
|
|
while waiting_for_threaded_resources:
|
|
try:
|
|
worker = threading.Thread(
|
|
target=self.threaded_handler,
|
|
args=[event])
|
|
worker.daemon = True
|
|
worker.start()
|
|
waiting_for_threaded_resources = False
|
|
except threading.ThreadError:
|
|
sleep(1)
|
|
|
|
def on_created(self, event):
|
|
"""Function called when a file created event occurs."""
|
|
self.handle_event(event)
|
|
|
|
def on_modified(self, event):
|
|
"""Function called when a file modified event occurs."""
|
|
self.handle_event(event)
|
|
|
|
def on_moved(self, event):
|
|
"""Function called when a file moved event occurs."""
|
|
self.handle_event(event)
|
|
|
|
def on_deleted(self, event):
|
|
"""Function called when a file deleted event occurs."""
|
|
self.handle_event(event)
|
|
|
|
def on_closed(self, event):
|
|
"""Function called when a file closed event occurs."""
|
|
self.handle_event(event)
|