""" This file contains definitions for a MEOW pattern based off of file events, along with an appropriate monitor for said events. Author(s): David Marchant """ import glob import threading import sys import os from fnmatch import translate from re import match from time import time, sleep from typing import Any, Union, Dict, List from watchdog.observers import Observer from watchdog.events import PatternMatchingEventHandler from meow_base.core.base_recipe import BaseRecipe from meow_base.core.base_monitor import BaseMonitor from meow_base.core.base_pattern import BasePattern from meow_base.core.meow import EVENT_KEYS, valid_meow_dict from meow_base.core.rule import Rule from meow_base.functionality.validation import check_type, valid_string, \ valid_dict, valid_list, valid_dir_path from meow_base.core.vars import VALID_RECIPE_NAME_CHARS, \ VALID_VARIABLE_NAME_CHARS, FILE_EVENTS, FILE_CREATE_EVENT, \ FILE_MODIFY_EVENT, FILE_MOVED_EVENT, DEBUG_INFO, DIR_EVENTS, \ FILE_RETROACTIVE_EVENT, SHA256, VALID_PATH_CHARS, FILE_CLOSED_EVENT, \ DIR_RETROACTIVE_EVENT from meow_base.functionality.debug import setup_debugging, print_debug from meow_base.functionality.hashing import get_hash from meow_base.functionality.meow import create_event # Events that are monitored by default _DEFAULT_MASK = [ FILE_CREATE_EVENT, FILE_MODIFY_EVENT, FILE_MOVED_EVENT, FILE_RETROACTIVE_EVENT, FILE_CLOSED_EVENT ] # watchdog events EVENT_TYPE_WATCHDOG = "watchdog" WATCHDOG_BASE = "monitor_base" WATCHDOG_HASH = "file_hash" WATCHDOG_EVENT_KEYS = { WATCHDOG_BASE: str, WATCHDOG_HASH: str, **EVENT_KEYS } def create_watchdog_event(path:str, rule:Any, base:str, time:float, hash:str, extras:Dict[Any,Any]={})->Dict[Any,Any]: """Function to create a MEOW event dictionary.""" return create_event( EVENT_TYPE_WATCHDOG, path, rule, time, extras={ **extras, **{ WATCHDOG_HASH: hash, WATCHDOG_BASE: base } } ) class FileEventPattern(BasePattern): # The path at which events will trigger this pattern triggering_path:str # The variable name given to the triggering file within recipe code triggering_file:str # Which types of event the pattern responds to event_mask:List[str] def __init__(self, name:str, triggering_path:str, recipe:str, triggering_file:str, event_mask:List[str]=_DEFAULT_MASK, parameters:Dict[str,Any]={}, outputs:Dict[str,Any]={}, sweep:Dict[str,Any]={}): """FileEventPattern Constructor. This is used to match against file system events, as caught by the python watchdog module.""" super().__init__(name, recipe, parameters, outputs, sweep) self._is_valid_triggering_path(triggering_path) self.triggering_path = triggering_path self._is_valid_triggering_file(triggering_file) self.triggering_file = triggering_file self._is_valid_event_mask(event_mask) self.event_mask = event_mask def _is_valid_triggering_path(self, triggering_path:str)->None: """Validation check for 'triggering_path' variable from main constructor.""" valid_string( triggering_path, VALID_PATH_CHARS+'*', min_length=1, hint="FileEventPattern.triggering_path" ) if len(triggering_path) < 1: raise ValueError ( f"triggering path '{triggering_path}' is too short. " "Minimum length is 1" ) def _is_valid_triggering_file(self, triggering_file:str)->None: """Validation check for 'triggering_file' variable from main constructor.""" valid_string( triggering_file, VALID_VARIABLE_NAME_CHARS, hint="FileEventPattern.triggering_file" ) def _is_valid_recipe(self, recipe:str)->None: """Validation check for 'recipe' variable from main constructor. Called within parent BasePattern constructor.""" valid_string( recipe, VALID_RECIPE_NAME_CHARS, hint="FileEventPattern.recipe" ) def _is_valid_parameters(self, parameters:Dict[str,Any])->None: """Validation check for 'parameters' variable from main constructor. Called within parent BasePattern constructor.""" valid_dict( parameters, str, Any, strict=False, min_length=0, hint="FileEventPattern.parameters" ) for k in parameters.keys(): valid_string( k, VALID_VARIABLE_NAME_CHARS, hint=f"FileEventPattern.parameters[{k}]" ) def _is_valid_output(self, outputs:Dict[str,str])->None: """Validation check for 'output' variable from main constructor. Called within parent BasePattern constructor.""" valid_dict( outputs, str, str, strict=False, min_length=0, hint="FileEventPattern.outputs" ) for k in outputs.keys(): valid_string( k, VALID_VARIABLE_NAME_CHARS, hint=f"FileEventPattern.outputs[{k}]" ) def _is_valid_event_mask(self, event_mask)->None: """Validation check for 'event_mask' variable from main constructor.""" valid_list( event_mask, str, min_length=1, hint="FileEventPattern.event_mask" ) for mask in event_mask: if mask not in FILE_EVENTS + DIR_EVENTS: raise ValueError(f"Invalid event mask '{mask}'. Valid are: " f"{FILE_EVENTS + DIR_EVENTS}") def _is_valid_sweep(self, sweep: Dict[str,Union[int,float,complex]]) -> None: """Validation check for 'sweep' variable from main constructor.""" return super()._is_valid_sweep(sweep) class WatchdogMonitor(BaseMonitor): # A handler object, to catch events event_handler:PatternMatchingEventHandler # The watchdog observer object monitor:Observer # The base monitored directory base_dir:str # Config option, above which debug messages are ignored debug_level:int # Where print messages are sent _print_target:Any def __init__(self, base_dir:str, patterns:Dict[str,FileEventPattern], recipes:Dict[str,BaseRecipe], autostart=False, settletime:int=1, name:str="", print:Any=sys.stdout, logging:int=0)->None: """WatchdogEventHandler Constructor. This uses the watchdog module to monitor a directory and all its sub-directories. Watchdog will provide the monitor with an caught events, with the monitor comparing them against its rules, and informing the runner of match.""" super().__init__(patterns, recipes, name=name) self._is_valid_base_dir(base_dir) self.base_dir = base_dir check_type(settletime, int, hint="WatchdogMonitor.settletime") self._print_target, self.debug_level = setup_debugging(print, logging) self.event_handler = WatchdogEventHandler(self, settletime=settletime) self.monitor = Observer() self.monitor.schedule( self.event_handler, self.base_dir, recursive=True ) print_debug(self._print_target, self.debug_level, "Created new WatchdogMonitor instance", DEBUG_INFO) if autostart: self.start() def start(self)->None: """Function to start the monitor.""" print_debug(self._print_target, self.debug_level, "Starting WatchdogMonitor", DEBUG_INFO) self._apply_retroactive_rules() self.monitor.start() def stop(self)->None: """Function to stop the monitor.""" print_debug(self._print_target, self.debug_level, "Stopping WatchdogMonitor", DEBUG_INFO) self.monitor.stop() def match(self, event)->None: """Function to determine if a given event matches the current rules.""" src_path = event.src_path prepend = "dir_" if event.is_directory else "file_" event_types = [prepend+i for i in event.event_type] # Remove the base dir from the path as trigger paths are given relative # to that handle_path = src_path.replace(self.base_dir, '', 1) # Also remove leading slashes, so we don't go off of the root directory while handle_path.startswith(os.path.sep): handle_path = handle_path[1:] self._rules_lock.acquire() try: for rule in self._rules.values(): # Skip events not within the event mask if any(i in event_types for i in rule.pattern.event_mask) \ != True: continue # Use regex to match event paths against rule paths target_path = rule.pattern.triggering_path recursive_regexp = translate(target_path) if os.name == 'nt': direct_regexp = recursive_regexp.replace( '.*', '[^'+ os.path.sep + os.path.sep +']*') else: direct_regexp = recursive_regexp.replace( '.*', '[^'+ os.path.sep +']*') recursive_hit = match(recursive_regexp, handle_path) direct_hit = match(direct_regexp, handle_path) # If matched, the create a watchdog event if direct_hit or recursive_hit: meow_event = create_watchdog_event( event.src_path, rule, self.base_dir, event.time_stamp, get_hash(event.src_path, SHA256) ) print_debug(self._print_target, self.debug_level, f"Event at {src_path} hit rule {rule.name}", DEBUG_INFO) # Send the event to the runner self.send_event_to_runner(meow_event) except Exception as e: self._rules_lock.release() raise e self._rules_lock.release() def _is_valid_base_dir(self, base_dir:str)->None: """Validation check for 'base_dir' variable from main constructor. Is automatically called during initialisation.""" valid_dir_path(base_dir, must_exist=True) def _is_valid_patterns(self, patterns:Dict[str,FileEventPattern])->None: """Validation check for 'patterns' variable from main constructor. Is automatically called during initialisation.""" valid_dict(patterns, str, FileEventPattern, min_length=0, strict=False) def _is_valid_recipes(self, recipes:Dict[str,BaseRecipe])->None: """Validation check for 'recipes' variable from main constructor. Is automatically called during initialisation.""" valid_dict(recipes, str, BaseRecipe, min_length=0, strict=False) def _get_valid_pattern_types(self)->List[type]: return [FileEventPattern] def _get_valid_recipe_types(self)->List[type]: return [BaseRecipe] def _apply_retroactive_rule(self, rule:Rule)->None: """Function to determine if a rule should be applied to the existing file structure, were the file structure created/modified now.""" self._rules_lock.acquire() try: # Check incase rule deleted since this function first called if rule.name not in self._rules: self._rules_lock.release() return if FILE_RETROACTIVE_EVENT in rule.pattern.event_mask \ or DIR_RETROACTIVE_EVENT in rule.pattern.event_mask: # Determine what paths are potentially triggerable and gather # files at those paths testing_path = os.path.join( self.base_dir, rule.pattern.triggering_path) globbed = glob.glob(testing_path) # For each file create a fake event. for globble in globbed: meow_event = create_watchdog_event( globble, rule, self.base_dir, time(), get_hash(globble, SHA256) ) print_debug(self._print_target, self.debug_level, f"Retroactive event for file at at {globble} hit rule " f"{rule.name}", DEBUG_INFO) # Send it to the runner self.send_event_to_runner(meow_event) except Exception as e: self._rules_lock.release() raise e self._rules_lock.release() def _apply_retroactive_rules(self)->None: """Function to determine if any rules should be applied to the existing file structure, were the file structure created/modified now.""" for rule in self._rules.values(): self._apply_retroactive_rule(rule) class WatchdogEventHandler(PatternMatchingEventHandler): # The monitor class running this handler monitor:WatchdogMonitor # A time to wait per event path, during which extra events are discared _settletime:int # TODO clean this struct occasionally # A Dict of recent job timestamps _recent_jobs:Dict[str, Any] # A lock to solve race conditions on '_recent_jobs' _recent_jobs_lock:threading.Lock def __init__(self, monitor:WatchdogMonitor, settletime:int=1): """WatchdogEventHandler Constructor. This inherits from watchdog PatternMatchingEventHandler, and is used to catch events, then filter out excessive events at the same location.""" super().__init__() self.monitor = monitor self._settletime = settletime self._recent_jobs = {} self._recent_jobs_lock = threading.Lock() def threaded_handler(self, event): """Function to determine if the given event shall be sent on to the monitor. After each event we wait for '_settletime', to catch subsequent events at the same location, so as to not swamp the system with repeated events.""" self._recent_jobs_lock.acquire() try: if event.src_path in self._recent_jobs: if event.time_stamp > self._recent_jobs[event.src_path][0]: self._recent_jobs[event.src_path][0] = event.time_stamp self._recent_jobs[event.src_path][1].add(event.event_type) else: self._recent_jobs_lock.release() return else: self._recent_jobs[event.src_path] = \ [event.time_stamp, {event.event_type}] # If we have a closed event then short-cut the wait and send event # immediately if event.event_type == FILE_CLOSED_EVENT: self.monitor.match(event) self._recent_jobs_lock.release() return except Exception as ex: self._recent_jobs_lock.release() raise Exception(ex) self._recent_jobs_lock.release() sleep(self._settletime) self._recent_jobs_lock.acquire() try: if event.src_path in self._recent_jobs \ and event.time_stamp < self._recent_jobs[event.src_path][0]: self._recent_jobs_lock.release() return except Exception as ex: self._recent_jobs_lock.release() raise Exception(ex) event.event_type = self._recent_jobs[event.src_path][1] self._recent_jobs_lock.release() self.monitor.match(event) def handle_event(self, event): """Handler function, called by all specific event functions. Will attach a timestamp to the event immediately, and attempt to start a threaded_handler so that the monitor can resume monitoring as soon as possible.""" event.time_stamp = time() waiting_for_threaded_resources = True while waiting_for_threaded_resources: try: worker = threading.Thread( target=self.threaded_handler, args=[event]) worker.daemon = True worker.start() waiting_for_threaded_resources = False except threading.ThreadError: sleep(1) def on_created(self, event): """Function called when a file created event occurs.""" self.handle_event(event) def on_modified(self, event): """Function called when a file modified event occurs.""" self.handle_event(event) def on_moved(self, event): """Function called when a file moved event occurs.""" self.handle_event(event) def on_deleted(self, event): """Function called when a file deleted event occurs.""" self.handle_event(event) def on_closed(self, event): """Function called when a file closed event occurs.""" self.handle_event(event)