From 9a073fc9aa99bbdb4b8ef75f61fd64545b78cd37 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 19 Mar 2025 15:24:02 +0100 Subject: [PATCH 01/21] WIP - pluggy refactor --- .../blueprint/imports/__init__.py | 2 +- .../blueprint/imports/importer.py | 2 +- .../blueprint/price_data_follower/__init__.py | 10 +- .../blueprint/price_data_follower/flags.py | 3 + .../blueprint/settings/__init__.py | 40 +++ .../settings/templates/settings.html | 3 +- changedetectionio/blueprint/ui/edit.py | 36 +- changedetectionio/blueprint/ui/views.py | 2 +- changedetectionio/flask_app.py | 5 +- changedetectionio/forms.py | 52 ++- changedetectionio/model/App.py | 1 + changedetectionio/processors/__init__.py | 310 +++++++++++++----- changedetectionio/processors/constants.py | 5 + .../processors/example_processor_plugin.py | 162 +++++++++ .../processors/pluggy_interface.py | 64 ++++ .../processors/test_plugin_example.py | 46 +++ .../processors/text_json_diff/processor.py | 2 +- changedetectionio/static/js/plugins.js | 18 + changedetectionio/store.py | 2 +- changedetectionio/templates/_helpers.html | 4 + changedetectionio/templates/settings.html | 310 ------------------ .../tests/test_processor_plugins.py | 120 +++++++ changedetectionio/update_worker.py | 22 +- 23 files changed, 777 insertions(+), 444 deletions(-) create mode 100644 changedetectionio/blueprint/price_data_follower/flags.py create mode 100644 changedetectionio/processors/constants.py create mode 100644 changedetectionio/processors/example_processor_plugin.py create mode 100644 changedetectionio/processors/pluggy_interface.py create mode 100644 changedetectionio/processors/test_plugin_example.py delete mode 100644 changedetectionio/templates/settings.html create mode 100644 changedetectionio/tests/test_processor_plugins.py diff --git a/changedetectionio/blueprint/imports/__init__.py b/changedetectionio/blueprint/imports/__init__.py index e0dd12bdc96..bc695f74e6b 100644 --- a/changedetectionio/blueprint/imports/__init__.py +++ b/changedetectionio/blueprint/imports/__init__.py @@ -63,7 +63,7 @@ def import_page(): update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) # Could be some remaining, or we could be on GET - form = forms.importForm(formdata=request.form if request.method == 'POST' else None) + form = forms.importForm(formdata=request.form if request.method == 'POST' else None, datastore=datastore) output = render_template("import.html", form=form, import_url_list_remaining="\n".join(remaining_urls), diff --git a/changedetectionio/blueprint/imports/importer.py b/changedetectionio/blueprint/imports/importer.py index 4824d13890e..1a6bd3d77ee 100644 --- a/changedetectionio/blueprint/imports/importer.py +++ b/changedetectionio/blueprint/imports/importer.py @@ -3,7 +3,6 @@ from wtforms import ValidationError from loguru import logger -from changedetectionio.forms import validate_url class Importer(): @@ -151,6 +150,7 @@ def run(self, self.new_uuids = [] from openpyxl import load_workbook + from changedetectionio.forms import validate_url try: wb = load_workbook(data) diff --git a/changedetectionio/blueprint/price_data_follower/__init__.py b/changedetectionio/blueprint/price_data_follower/__init__.py index 6011303a267..018d54fbad2 100644 --- a/changedetectionio/blueprint/price_data_follower/__init__.py +++ b/changedetectionio/blueprint/price_data_follower/__init__.py @@ -1,18 +1,14 @@ -from changedetectionio.strtobool import strtobool from flask import Blueprint, flash, redirect, url_for from flask_login import login_required -from changedetectionio.store import ChangeDetectionStore -from changedetectionio import queuedWatchMetaData from queue import PriorityQueue +from changedetectionio import queuedWatchMetaData +from changedetectionio.processors.constants import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT -PRICE_DATA_TRACK_ACCEPT = 'accepted' -PRICE_DATA_TRACK_REJECT = 'rejected' -def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue): +def construct_blueprint(datastore, update_q: PriorityQueue): price_data_follower_blueprint = Blueprint('price_data_follower', __name__) - @login_required @price_data_follower_blueprint.route("//accept", methods=['GET']) def accept(uuid): diff --git a/changedetectionio/blueprint/price_data_follower/flags.py b/changedetectionio/blueprint/price_data_follower/flags.py new file mode 100644 index 00000000000..652587f708f --- /dev/null +++ b/changedetectionio/blueprint/price_data_follower/flags.py @@ -0,0 +1,3 @@ + +PRICE_DATA_TRACK_ACCEPT = 'accepted' +PRICE_DATA_TRACK_REJECT = 'rejected' \ No newline at end of file diff --git a/changedetectionio/blueprint/settings/__init__.py b/changedetectionio/blueprint/settings/__init__.py index 5375b565cc0..2614794ad69 100644 --- a/changedetectionio/blueprint/settings/__init__.py +++ b/changedetectionio/blueprint/settings/__init__.py @@ -84,6 +84,45 @@ def settings_page(): # Convert to ISO 8601 format, all date/time relative events stored as UTC time utc_time = datetime.now(ZoneInfo("UTC")).isoformat() + + # Get processor plugins info + from changedetectionio.processors import get_all_plugins_info + plugins_info = get_all_plugins_info() + + # Create/update form with plugins info + default = deepcopy(datastore.data['settings']) + form = forms.globalSettingsForm( + formdata=request.form if request.method == 'POST' else None, + data=default, + extra_notification_tokens=datastore.get_unique_notification_tokens_available(), + plugins_info=plugins_info + ) + + # Process settings including plugin toggles + if request.method == 'POST' and form.validate(): + # Process the main form data + app_update = dict(deepcopy(form.data['application'])) + + # Don't update password with '' or False (Added by wtforms when not in submission) + if 'password' in app_update and not app_update['password']: + del (app_update['password']) + + datastore.data['settings']['application'].update(app_update) + datastore.data['settings']['requests'].update(form.data['requests']) + + # Update plugin settings from the dynamically created fields + enabled_plugins = {} + if hasattr(form, 'plugins'): + for field_name, field in form.plugins._fields.items(): + if field_name.startswith('plugin_'): + plugin_name = field_name.replace('plugin_', '') + enabled_plugins[plugin_name] = field.data + + # Update the datastore with plugin settings + datastore.data['settings']['application']['enabled_plugins'] = enabled_plugins + + datastore.needs_write_urgent = True + flash("Settings updated.") output = render_template("settings.html", api_key=datastore.data['settings']['application'].get('api_access_token'), @@ -93,6 +132,7 @@ def settings_page(): form=form, hide_remove_pass=os.getenv("SALTED_PASS", False), min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)), + plugins_info=plugins_info, settings_application=datastore.data['settings']['application'], timezone_default_config=datastore.data['settings']['application'].get('timezone'), utc_time=utc_time, diff --git a/changedetectionio/blueprint/settings/templates/settings.html b/changedetectionio/blueprint/settings/templates/settings.html index 1dfeba0df49..cc79f0fd6c9 100644 --- a/changedetectionio/blueprint/settings/templates/settings.html +++ b/changedetectionio/blueprint/settings/templates/settings.html @@ -9,6 +9,7 @@ const email_notification_prefix=JSON.parse('{{emailprefix|tojson}}'); {% endif %} + @@ -300,7 +301,7 @@

Chrome Extension

{{ render_button(form.save_button) }} Back - Clear Snapshot History + Clear Snapshot History
diff --git a/changedetectionio/blueprint/ui/edit.py b/changedetectionio/blueprint/ui/edit.py index 73cd785308d..3a9d4532639 100644 --- a/changedetectionio/blueprint/ui/edit.py +++ b/changedetectionio/blueprint/ui/edit.py @@ -24,7 +24,6 @@ def _watch_has_tag_options_set(watch): # https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists # https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ? def edit_page(uuid): - from changedetectionio import forms from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config from changedetectionio import processors import importlib @@ -43,7 +42,7 @@ def edit_page(uuid): switch_processor = request.args.get('switch_processor') if switch_processor: - for p in processors.available_processors(): + for p in processors.available_processors(datastore): if p[0] == switch_processor: datastore.data['watching'][uuid]['processor'] = switch_processor flash(f"Switched to mode - {p[1]}.") @@ -61,31 +60,19 @@ def edit_page(uuid): default['proxy'] = '' # proxy_override set to the json/text list of the items - # Does it use some custom form? does one exist? - processor_name = datastore.data['watching'][uuid].get('processor', '') - processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None) - if not processor_classes: - flash(f"Cannot load the edit form for processor/plugin '{processor_classes[1]}', plugin missing?", 'error') + # Get the appropriate form class for this processor using the pluggy system + processor_name = datastore.data['watching'][uuid].get('processor', 'text_json_diff') + form_class = processors.get_form_class_for_processor(processor_name) + + if not form_class: + flash(f"Cannot load the edit form for processor/plugin '{processor_name}', plugin missing?", 'error') return redirect(url_for('index')) - parent_module = processors.get_parent_module(processor_classes[0]) - - try: - # Get the parent of the "processor.py" go up one, get the form (kinda spaghetti but its reusing existing code) - forms_module = importlib.import_module(f"{parent_module.__name__}.forms") - # Access the 'processor_settings_form' class from the 'forms' module - form_class = getattr(forms_module, 'processor_settings_form') - except ModuleNotFoundError as e: - # .forms didnt exist - form_class = forms.processor_text_json_diff_form - except AttributeError as e: - # .forms exists but no useful form - form_class = forms.processor_text_json_diff_form - form = form_class(formdata=request.form if request.method == 'POST' else None, data=default, extra_notification_tokens=default.extra_notification_token_values(), - default_system_settings=datastore.data['settings'] + default_system_settings=datastore.data['settings'], + datastore=datastore ) # For the form widget tag UUID back to "string name" for the field @@ -165,7 +152,8 @@ def edit_page(uuid): datastore.data['watching'][uuid]['tags'] = [] # Recast it if need be to right data Watch handler - watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor')) + processor_name = form.data.get('processor') + watch_class = processors.get_watch_model_for_processor(processor_name) datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, default=datastore.data['watching'][uuid]) flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.") @@ -236,7 +224,7 @@ def edit_page(uuid): # Only works reliably with Playwright template_args = { - 'available_processors': processors.available_processors(), + 'available_processors': processors.available_processors(datastore), 'available_timezones': sorted(available_timezones()), 'browser_steps_config': browser_step_ui_config, 'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), diff --git a/changedetectionio/blueprint/ui/views.py b/changedetectionio/blueprint/ui/views.py index 903a4c7778f..b48a25d67f8 100644 --- a/changedetectionio/blueprint/ui/views.py +++ b/changedetectionio/blueprint/ui/views.py @@ -191,7 +191,7 @@ def diff_history_page(uuid): @login_optionally_required def form_quick_watch_add(): from changedetectionio import forms - form = forms.quickWatchForm(request.form) + form = forms.quickWatchForm(request.form, datastore=datastore) if not form.validate(): for widget, l in form.errors.items(): diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index 232ad94468a..f4d9f0dff1b 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -75,6 +75,7 @@ # Disables caching of the templates app.config['TEMPLATES_AUTO_RELOAD'] = True app.jinja_env.add_extension('jinja2.ext.loopcontrols') +app.jinja_env.globals.update(hasattr=hasattr) csrf = CSRFProtect() csrf.init_app(app) notification_debug_log=[] @@ -343,7 +344,7 @@ def before_request_handle_cookie_x_settings(): @login_optionally_required def index(): global datastore - from changedetectionio import forms + from changedetectionio.forms import quickWatchForm active_tag_req = request.args.get('tag', '').lower().strip() active_tag_uuid = active_tag = None @@ -394,7 +395,7 @@ def index(): else: sorted_watches.append(watch) - form = forms.quickWatchForm(request.form) + form = quickWatchForm(request.form, datastore=datastore) page = request.args.get(get_page_parameter(), type=int, default=1) total_count = len(sorted_watches) diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index 3fd199bb5c9..3d204c117e1 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -23,7 +23,7 @@ from flask_wtf.file import FileField, FileAllowed from wtforms.fields import FieldList -from wtforms.validators import ValidationError +from wtforms.validators import ValidationError, Optional from validators.url import url as url_validator @@ -508,8 +508,17 @@ class quickWatchForm(Form): url = fields.URLField('URL', validators=[validateURL()]) tags = StringTagUUID('Group tag', [validators.Optional()]) watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"}) - processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff") + processor = RadioField(u'Processor', default="text_json_diff") edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"}) + + def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs): + super().__init__(formdata, obj, prefix, data, meta, **kwargs) + # Set processor choices based on datastore if available + datastore = kwargs.get('datastore') + if datastore: + self.processor.choices = self.processors.available_processors(datastore) + else: + self.processor.choices = self.processors.available_processors() @@ -522,6 +531,13 @@ def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **k self.notification_body.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) + + # Set processor choices based on datastore if available + datastore = kwargs.get('datastore') + if datastore: + self.processor.choices = self.processors.available_processors(datastore) + else: + self.processor.choices = self.processors.available_processors() extract_title_as_title = BooleanField('Extract from document and use as watch title', default=False) fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) @@ -529,17 +545,26 @@ def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **k notification_format = SelectField('Notification format', choices=valid_notification_formats.keys()) notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()]) notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()]) - processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff") + processor = RadioField( label=u"Processor - What do you want to achieve?", default="text_json_diff") timezone = StringField("Timezone for watch schedule", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()]) webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")]) class importForm(Form): from . import processors - processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff") + processor = RadioField(u'Processor', default="text_json_diff") urls = TextAreaField('URLs') xlsx_file = FileField('Upload .xlsx file', validators=[FileAllowed(['xlsx'], 'Must be .xlsx file!')]) file_mapping = SelectField('File mapping', [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')}) + + def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs): + super().__init__(formdata, obj, prefix, data, meta, **kwargs) + # Set processor choices based on datastore if available + datastore = kwargs.get('datastore') + if datastore: + self.processor.choices = self.processors.available_processors(datastore) + else: + self.processor.choices = self.processors.available_processors() class SingleBrowserStep(Form): @@ -714,11 +739,12 @@ class globalSettingsRequestForm(Form): default_ua = FormField(DefaultUAInputForm, label="Default User-Agent overrides") def validate_extra_proxies(self, extra_validators=None): - for e in self.data['extra_proxies']: - if e.get('proxy_name') or e.get('proxy_url'): - if not e.get('proxy_name','').strip() or not e.get('proxy_url','').strip(): - self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.') - return False + if self.data.get('extra_proxies'): + for e in self.data['extra_proxies']: + if e.get('proxy_name') or e.get('proxy_url'): + if not e.get('proxy_name','').strip() or not e.get('proxy_url','').strip(): + self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.') + return False # datastore.data['settings']['application'].. @@ -749,6 +775,14 @@ class globalSettingsApplicationForm(commonSettingsForm): validators=[validators.NumberRange(min=0, message="Should contain zero or more attempts")]) + # Create plugins form and add it as an attribute +# plugin_form = PluginsManagementForm( +# formdata=formdata, +# plugins_info=plugins_info, +# enabled_plugins=enabled_plugins +# ) + + class globalSettingsForm(Form): # Define these as FormFields/"sub forms", this way it matches the JSON storage diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index 4c9c34fec05..b4a55fd768a 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -54,6 +54,7 @@ class model(dict): 'webdriver_delay': None , # Extra delay in seconds before extracting text 'tags': {}, #@todo use Tag.model initialisers 'timezone': None, # Default IANA timezone name + 'enabled_plugins': {} # Dictionary of plugin names and their enabled status } } } diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index adf08a18a9f..299a2ebe44c 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -9,6 +9,8 @@ import os import pkgutil import re +import sys +from .pluggy_interface import plugin_manager, hookimpl class difference_detection_processor(): @@ -172,83 +174,245 @@ def run_changedetection(self, watch): return changed_detected, update_obj, ''.encode('utf-8') -def find_sub_packages(package_name): +def get_all_plugins_info(): """ - Find all sub-packages within the given package. - - :param package_name: The name of the base package to scan for sub-packages. - :return: A list of sub-package names. + Get information about all registered processor plugins + :return: A list of dictionaries with plugin info """ - package = importlib.import_module(package_name) - return [name for _, name, is_pkg in pkgutil.iter_modules(package.__path__) if is_pkg] - - -def find_processors(): + plugins_info = [] + + # Collect from all registered plugins + for plugin in plugin_manager.get_plugins(): + if hasattr(plugin, "get_processor_name") and hasattr(plugin, "get_processor_description"): + processor_name = plugin.get_processor_name() + description = plugin.get_processor_description() + + # Get version if available + version = "N/A" + if hasattr(plugin, "get_processor_version"): + plugin_version = plugin.get_processor_version() + if plugin_version: + version = plugin_version + + if processor_name and description: + plugins_info.append({ + "name": processor_name, + "description": description, + "version": version + }) + + # Fallback if no plugins registered + if not plugins_info: + plugins_info = [ + {"name": "text_json_diff", "description": "Webpage Text/HTML, JSON and PDF changes", "version": "1.0.0"}, + {"name": "restock_diff", "description": "Re-stock & Price detection for single product pages", "version": "1.0.0"} + ] + + return plugins_info + +def available_processors(datastore=None): """ - Find all subclasses of DifferenceDetectionProcessor in the specified package. - - :param package_name: The name of the package to scan for processor modules. - :return: A list of (module, class) tuples. + Get a list of processors by name and description for the UI elements + Filtered by enabled_plugins setting if datastore is provided + :return: A list of tuples (processor_name, description) """ - package_name = "changedetectionio.processors" # Name of the current package/module - - processors = [] - sub_packages = find_sub_packages(package_name) - - for sub_package in sub_packages: - module_name = f"{package_name}.{sub_package}.processor" + plugins_info = get_all_plugins_info() + processor_list = [] + + # If datastore is provided, filter by enabled_plugins + if datastore: + # Make sure enabled_plugins exists in datastore + if 'enabled_plugins' not in datastore.data['settings']['application']: + datastore.data['settings']['application']['enabled_plugins'] = {} + + enabled_plugins = datastore.data['settings']['application']['enabled_plugins'] + + # Scan for any new plugins that aren't in the enabled_plugins dict yet + # Default built-in processors to enabled, third-party to disabled + plugins_updated = False + for plugin in plugins_info: + if plugin["name"] not in enabled_plugins: + # Built-in processors are enabled by default + if plugin["name"] in ["text_json_diff", "restock_diff"]: + enabled_plugins[plugin["name"]] = True + else: + # Third-party plugins are disabled by default + enabled_plugins[plugin["name"]] = False + plugins_updated = True + + # Save changes if we added new plugins + if plugins_updated: + datastore.needs_write = True + + # Only include enabled plugins + for plugin in plugins_info: + if enabled_plugins.get(plugin["name"], False): + processor_list.append((plugin["name"], plugin["description"])) + else: + # No datastore provided, include all plugins + for plugin in plugins_info: + processor_list.append((plugin["name"], plugin["description"])) + + return processor_list + +def get_processor_handler(processor_name, datastore, watch_uuid): + """ + Get the processor handler for the specified processor name + :return: The processor handler instance + """ + # Try each plugin in turn + for plugin in plugin_manager.get_plugins(): + if hasattr(plugin, "perform_site_check"): + handler = plugin.perform_site_check(datastore=datastore, watch_uuid=watch_uuid) + if handler: + return handler + + # If no plugins handled it, use the appropriate built-in processor + watch = datastore.data['watching'].get(watch_uuid) + if watch and watch.get('processor') == 'restock_diff': + from .restock_diff.processor import perform_site_check + return perform_site_check(datastore=datastore, watch_uuid=watch_uuid) + else: + # Default to text_json_diff + from .text_json_diff.processor import perform_site_check + return perform_site_check(datastore=datastore, watch_uuid=watch_uuid) + +def get_form_class_for_processor(processor_name): + """ + Get the form class for the specified processor name + :return: The form class + """ + # Try each plugin in turn + for plugin in plugin_manager.get_plugins(): + if hasattr(plugin, "get_form_class"): + form_class = plugin.get_form_class(processor_name=processor_name) + if form_class: + return form_class + + # If no plugins provided a form class, use the appropriate built-in form + if processor_name == 'restock_diff': try: - module = importlib.import_module(module_name) - - # Iterate through all classes in the module - for name, obj in inspect.getmembers(module, inspect.isclass): - if issubclass(obj, difference_detection_processor) and obj is not difference_detection_processor: - processors.append((module, sub_package)) - except (ModuleNotFoundError, ImportError) as e: - logger.warning(f"Failed to import module {module_name}: {e} (find_processors())") - - return processors - - -def get_parent_module(module): - module_name = module.__name__ - if '.' not in module_name: - return None # Top-level module has no parent - parent_module_name = module_name.rsplit('.', 1)[0] - try: - return importlib.import_module(parent_module_name) - except Exception as e: - pass - - return False - - - -def get_custom_watch_obj_for_processor(processor_name): - from changedetectionio.model import Watch - watch_class = Watch.model - processor_classes = find_processors() - custom_watch_obj = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None) - if custom_watch_obj: - # Parent of .processor.py COULD have its own Watch implementation - parent_module = get_parent_module(custom_watch_obj[0]) - if hasattr(parent_module, 'Watch'): - watch_class = parent_module.Watch - - return watch_class - - -def available_processors(): + from .restock_diff.forms import processor_settings_form + return processor_settings_form + except ImportError: + pass + + # Default to text_json_diff form + from changedetectionio import forms + return forms.processor_text_json_diff_form + +def get_watch_model_for_processor(processor_name): """ - Get a list of processors by name and description for the UI elements - :return: A list :) + Get the Watch model class for the specified processor name + :return: The Watch model class """ - - processor_classes = find_processors() - - available = [] - for package, processor_class in processor_classes: - available.append((processor_class, package.name)) - - return available - + # Try each plugin in turn + for plugin in plugin_manager.get_plugins(): + if hasattr(plugin, "get_watch_model_class"): + model_class = plugin.get_watch_model_class(processor_name=processor_name) + if model_class: + return model_class + + # Default to standard Watch model + from changedetectionio.model import Watch + return Watch.model + +# Define plugin implementations for the built-in processors +class TextJsonDiffPlugin: + @hookimpl + def get_processor_name(self): + return "text_json_diff" + + @hookimpl + def get_processor_description(self): + from .text_json_diff.processor import name + return name + + @hookimpl + def get_processor_version(self): + return "1.0.0" + + @hookimpl + def perform_site_check(self, datastore, watch_uuid): + watch = datastore.data['watching'].get(watch_uuid) + if watch and watch.get('processor', 'text_json_diff') == 'text_json_diff': + from .text_json_diff.processor import perform_site_check + return perform_site_check(datastore=datastore, watch_uuid=watch_uuid) + return None + + @hookimpl + def get_form_class(self, processor_name): + if processor_name == 'text_json_diff': + from changedetectionio import forms + return forms.processor_text_json_diff_form + return None + + @hookimpl + def get_watch_model_class(self, processor_name): + if processor_name == 'text_json_diff': + from changedetectionio.model import Watch + return Watch.model + return None + +class RestockDiffPlugin: + @hookimpl + def get_processor_name(self): + return "restock_diff" + + @hookimpl + def get_processor_description(self): + from .restock_diff.processor import name + return name + + @hookimpl + def get_processor_version(self): + return "1.0.0" + + @hookimpl + def perform_site_check(self, datastore, watch_uuid): + watch = datastore.data['watching'].get(watch_uuid) + if watch and watch.get('processor') == 'restock_diff': + from .restock_diff.processor import perform_site_check + return perform_site_check(datastore=datastore, watch_uuid=watch_uuid) + return None + + @hookimpl + def get_form_class(self, processor_name): + if processor_name == 'restock_diff': + try: + from .restock_diff.forms import processor_settings_form + return processor_settings_form + except ImportError: + pass + return None + + @hookimpl + def get_watch_model_class(self, processor_name): + if processor_name == 'restock_diff': + # Currently uses default watch model, could be customized in the future + from changedetectionio.model import Watch + return Watch.model + return None + +# Import our example plugins +from .example_processor_plugin import ExampleProcessorPlugin + +# For backward compatibility +def get_custom_watch_obj_for_processor(processor_name): + return get_watch_model_for_processor(processor_name) + +# Register the built-in processor plugins +plugin_manager.register(TextJsonDiffPlugin()) +plugin_manager.register(RestockDiffPlugin()) +plugin_manager.register(ExampleProcessorPlugin()) + +# Check for test plugin and conditionally register it +try: + # This avoids circular imports + from .test_plugin_example import ExampleProcessorPlugin as TestExampleProcessorPlugin + test_plugin_instance = TestExampleProcessorPlugin() + # Only register if it has a different name than the regular example plugin + if test_plugin_instance.get_processor_name() != "example_processor": + plugin_manager.register(test_plugin_instance) +except (ImportError, AttributeError): + pass \ No newline at end of file diff --git a/changedetectionio/processors/constants.py b/changedetectionio/processors/constants.py new file mode 100644 index 00000000000..fd6ff16f62d --- /dev/null +++ b/changedetectionio/processors/constants.py @@ -0,0 +1,5 @@ +# Common constants used across processors + +# Price data tracking constants +PRICE_DATA_TRACK_ACCEPT = 'accepted' +PRICE_DATA_TRACK_REJECT = 'rejected' \ No newline at end of file diff --git a/changedetectionio/processors/example_processor_plugin.py b/changedetectionio/processors/example_processor_plugin.py new file mode 100644 index 00000000000..058fe9a1040 --- /dev/null +++ b/changedetectionio/processors/example_processor_plugin.py @@ -0,0 +1,162 @@ +""" +Example plugin to demonstrate how to create a new processor plugin +""" +from .pluggy_interface import hookimpl +import importlib + +class ExampleProcessorPlugin: + """ + Example processor plugin that extends the text_json_diff processor + """ + + @hookimpl + def get_processor_name(self): + return "example_processor" + + @hookimpl + def get_processor_description(self): + return "Example Processor Plugin - For demonstration purposes" + + @hookimpl + def get_processor_version(self): + return "0.1.0-beta" + + @hookimpl + def perform_site_check(self, datastore, watch_uuid): + watch = datastore.data['watching'].get(watch_uuid) + if watch and watch.get('processor') == 'example_processor': + # Log that we're using our special example processor + from loguru import logger + + # Check if the example mode is enabled + if watch.is_example_mode_enabled(): + # Get the threshold value for our plugin + threshold = watch.get_example_threshold() + logger.info(f"Example processor using mode: {watch.get('example_settings', {}).get('mode')} with threshold: {threshold}") + + # Check if advanced features are enabled + advanced_features = watch.get('example_settings', {}).get('example_toggle', False) + if advanced_features: + logger.info("Example processor advanced features are enabled") + else: + logger.info("Example processor is in OFF mode, using standard processing") + + # Import here to avoid circular imports + from changedetectionio.processors.text_json_diff.processor import perform_site_check + return perform_site_check(datastore=datastore, watch_uuid=watch_uuid) + return None + + @hookimpl + def get_form_class(self, processor_name): + if processor_name == 'example_processor': + # Import here to avoid circular imports + from changedetectionio import forms + from wtforms import StringField, BooleanField, TextAreaField, RadioField, FloatField + from wtforms.validators import Optional, NumberRange + from wtforms.fields.form import FormField + from wtforms.form import Form + + # Create a settings form for the example plugin + class ExampleSettingsForm(Form): + mode = RadioField(label='Example Mode', choices=[ + ('mode_a', "Mode A - Default behavior"), + ('mode_b', "Mode B - Alternative behavior"), + ('off', "Off - Disable example functionality"), + ], default="mode_a") + + threshold = FloatField('Threshold value', [ + Optional(), + NumberRange(min=0, max=100, message="Should be between 0 and 100") + ], render_kw={"placeholder": "0", "size": "5"}) + + example_toggle = BooleanField('Enable advanced features', default=False) + example_notes = TextAreaField('Notes', validators=[Optional()]) + + # Create the main form by extending the base form + class ExampleProcessorForm(forms.processor_text_json_diff_form): + example_settings = FormField(ExampleSettingsForm) + + def extra_tab_content(self): + return 'Example Plugin' + + def extra_form_content(self): + output = "" + + # Show warning if tag overrides settings (similar to restock plugin) + if getattr(self, 'watch', None) and getattr(self, 'datastore'): + for tag_uuid in self.watch.get('tags'): + tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {}) + if tag.get('overrides_watch'): + output = f"""<p><strong>Note! A Group tag overrides the example plugin settings here.</strong></p><style>#example-fieldset-group {{ opacity: 0.6; }}</style>""" + + output += """ + {% from '_helpers.html' import render_field, render_checkbox_field, render_button %} + <script> + $(document).ready(function () { + toggleOpacity('#example_settings-example_toggle', '.example-advanced-settings', true); + }); + </script> + + <fieldset id="example-fieldset-group"> + <div class="pure-control-group"> + <fieldset class="pure-group inline-radio"> + {{ render_field(form.example_settings.mode) }} + </fieldset> + <fieldset class="pure-group"> + {{ render_checkbox_field(form.example_settings.example_toggle) }} + <span class="pure-form-message-inline">Enable advanced example features</span> + </fieldset> + <fieldset class="pure-group example-advanced-settings"> + {{ render_field(form.example_settings.threshold) }} + <span class="pure-form-message-inline">Set the threshold percentage for this example plugin</span> + <span class="pure-form-message-inline">For example, 5% means the plugin will only activate when changes exceed 5% of the content</span> + </fieldset> + <fieldset class="pure-group example-advanced-settings"> + {{ render_field(form.example_settings.example_notes, rows=3, placeholder="Add any notes here...") }} + <span class="pure-form-message-inline">Additional notes for this watch</span> + </fieldset> + </div> + </fieldset> + """ + return output + + return ExampleProcessorForm + return None + + @hookimpl + def get_watch_model_class(self, processor_name): + if processor_name == 'example_processor': + # Import here to avoid circular imports + from changedetectionio.model import Watch + + # Create a custom Watch model class for the example plugin + class ExampleWatchModel(Watch.model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Initialize example plugin settings if not present + if not self.get('example_settings'): + self['example_settings'] = { + 'mode': 'mode_a', + 'threshold': 0, + 'example_toggle': False, + 'example_notes': '' + } + + # Add any custom methods for the example plugin + def get_example_threshold(self): + """Get the threshold value or return the default""" + settings = self.get('example_settings', {}) + return settings.get('threshold', 0) + + def is_example_mode_enabled(self): + """Check if the example plugin is enabled""" + settings = self.get('example_settings', {}) + return settings.get('mode') != 'off' + + return ExampleWatchModel + return None + +# This function would be called by the setup.py entry_points +def register_plugin(plugin_manager): + plugin_manager.register(ExampleProcessorPlugin()) \ No newline at end of file diff --git a/changedetectionio/processors/pluggy_interface.py b/changedetectionio/processors/pluggy_interface.py new file mode 100644 index 00000000000..4ab9692a041 --- /dev/null +++ b/changedetectionio/processors/pluggy_interface.py @@ -0,0 +1,64 @@ +import pluggy + +# Ensure that the namespace in HookspecMarker matches PluginManager +PLUGIN_NAMESPACE = "changedetectionio_processors" + +hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE) +hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE) + + +class ProcessorSpec: + """Hook specifications for difference detection processors.""" + + @hookspec + def get_processor_name(): + """Return the processor name for selection in the UI.""" + pass + + @hookspec + def get_processor_description(): + """Return a human-readable description of the processor.""" + pass + + @hookspec + def get_processor_version(): + """Return the processor plugin version.""" + pass + + @hookspec + def perform_site_check(datastore, watch_uuid): + """Return the processor handler class or None if not applicable. + + Each plugin should check if it's the right processor for this watch + and return None if it's not. + + Should return an instance of a class that implements: + - call_browser(preferred_proxy_id=None): Fetch the content + - run_changedetection(watch): Analyze for changes and return tuple of (changed_detected, update_obj, contents) + """ + pass + + @hookspec + def get_form_class(processor_name): + """Return the WTForms form class for the processor settings or None if not applicable. + + Each plugin should check if it's the right processor and return None if not. + """ + pass + + @hookspec + def get_watch_model_class(processor_name): + """Return a custom Watch model class if needed or None if not applicable. + + Each plugin should check if it's the right processor and return None if not. + """ + pass + +# Set up Pluggy Plugin Manager +plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE) + +# Register hookspecs +plugin_manager.add_hookspecs(ProcessorSpec) + +# Discover installed plugins from external packages (if any) +plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE) \ No newline at end of file diff --git a/changedetectionio/processors/test_plugin_example.py b/changedetectionio/processors/test_plugin_example.py new file mode 100644 index 00000000000..6a12bf7c2d4 --- /dev/null +++ b/changedetectionio/processors/test_plugin_example.py @@ -0,0 +1,46 @@ +""" +Example plugin to demonstrate how to create a new processor plugin +""" +from .pluggy_interface import hookimpl +from .text_json_diff.processor import perform_site_check as text_json_diff_perform_site_check +from changedetectionio import forms + +class ExampleProcessorPlugin: + """ + Example processor plugin that extends the text_json_diff processor + """ + + @hookimpl + def get_processor_name(self): + return "example_processor" + + @hookimpl + def get_processor_description(self): + return "Example Processor Plugin - For demonstration purposes" + + @hookimpl + def perform_site_check(self, datastore, watch_uuid): + watch = datastore.data['watching'].get(watch_uuid) + if watch and watch.get('processor') == 'example_processor': + # This processor is just a wrapper around text_json_diff for demonstration + return text_json_diff_perform_site_check(datastore=datastore, watch_uuid=watch_uuid) + return None + + @hookimpl + def get_form_class(self, processor_name): + if processor_name == 'example_processor': + # Use the default form for this example + return forms.processor_text_json_diff_form + return None + + @hookimpl + def get_watch_model_class(self, processor_name): + if processor_name == 'example_processor': + # Use the default Watch model for this example + from changedetectionio.model import Watch + return Watch.model + return None + +# This function would be called by the setup.py entry_points +def register_plugin(plugin_manager): + plugin_manager.register(ExampleProcessorPlugin()) \ No newline at end of file diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py index faeab5d2cec..5ccc8ce5f3f 100644 --- a/changedetectionio/processors/text_json_diff/processor.py +++ b/changedetectionio/processors/text_json_diff/processor.py @@ -10,7 +10,7 @@ from changedetectionio.processors import difference_detection_processor from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE from changedetectionio import html_tools, content_fetchers -from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT +from changedetectionio.processors.constants import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT from loguru import logger urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) diff --git a/changedetectionio/static/js/plugins.js b/changedetectionio/static/js/plugins.js index 1eecef6cfb6..1dea43fe139 100644 --- a/changedetectionio/static/js/plugins.js +++ b/changedetectionio/static/js/plugins.js @@ -1,4 +1,22 @@ (function ($) { + // Initialize plugin management UI when the DOM is ready + $(document).ready(function() { + // Add event handlers for plugin checkboxes + $("#plugins-table input[type='checkbox']").on('change', function() { + const isEnabled = $(this).is(':checked'); + + // For visual feedback, fade the row when disabled + if (isEnabled) { + $(this).closest('tr').removeClass('disabled-plugin'); + } else { + $(this).closest('tr').addClass('disabled-plugin'); + } + + const pluginName = $(this).closest('tr').find('td:nth-child(2)').text().trim(); + console.log(`Plugin ${pluginName} ${isEnabled ? 'enabled' : 'disabled'}`); + }); + }); + /** * debounce * @param {integer} milliseconds This param indicates the number of milliseconds diff --git a/changedetectionio/store.py b/changedetectionio/store.py index efc29275a07..9d5c70861ec 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -6,7 +6,7 @@ from .html_tools import TRANSLATE_WHITESPACE_TABLE from . model import App, Watch -from copy import deepcopy, copy +from copy import deepcopy from os import path, unlink from threading import Lock import json diff --git a/changedetectionio/templates/_helpers.html b/changedetectionio/templates/_helpers.html index 2ed75a30170..0bc2368467d 100644 --- a/changedetectionio/templates/_helpers.html +++ b/changedetectionio/templates/_helpers.html @@ -1,3 +1,7 @@ +{% macro hasattr(obj, name) -%} + {{ obj is defined and name in obj.__dict__ }} +{%- endmacro %} + {% macro render_field(field) %} <div {% if field.errors %} class="error" {% endif %}>{{ field.label }}</div> <div {% if field.errors %} class="error" {% endif %}>{{ field(**kwargs)|safe }} diff --git a/changedetectionio/templates/settings.html b/changedetectionio/templates/settings.html deleted file mode 100644 index 2e651a0104e..00000000000 --- a/changedetectionio/templates/settings.html +++ /dev/null @@ -1,310 +0,0 @@ -{% extends 'base.html' %} - -{% block content %} -{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form %} -{% from '_common_fields.html' import render_common_settings_form %} -<script> - const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}"; -{% if emailprefix %} - const email_notification_prefix=JSON.parse('{{emailprefix|tojson}}'); -{% endif %} -</script> -<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> -<script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script> -<script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script> -<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script> -<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script> -<script src="{{url_for('static_content', group='js', filename='scheduler.js')}}" defer></script> -<div class="edit-form"> - <div class="tabs collapsable"> - <ul> - <li class="tab" id=""><a href="#general">General</a></li> - <li class="tab"><a href="#notifications">Notifications</a></li> - <li class="tab"><a href="#fetching">Fetching</a></li> - <li class="tab"><a href="#filters">Global Filters</a></li> - <li class="tab"><a href="#api">API</a></li> - <li class="tab"><a href="#timedate">Time & Date</a></li> - <li class="tab"><a href="#proxies">CAPTCHA & Proxies</a></li> - </ul> - </div> - <div class="box-wrap inner"> - <form class="pure-form pure-form-stacked settings" action="{{url_for('settings.settings_page')}}" method="POST"> - <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" > - <div class="tab-pane-inner" id="general"> - <fieldset> - <div class="pure-control-group"> - {{ render_field(form.requests.form.time_between_check, class="time-check-widget") }} - <span class="pure-form-message-inline">Default recheck time for all watches, current system minimum is <i>{{min_system_recheck_seconds}}</i> seconds (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Misc-system-settings#enviroment-variables">more info</a>).</span> - <div id="time-between-check-schedule"> - <!-- Start Time and End Time --> - <div id="limit-between-time"> - {{ render_time_schedule_form(form.requests, available_timezones, timezone_default_config) }} - </div> - </div> - </div> - <div class="pure-control-group"> - {{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }} - <span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span> - </div> - <div class="pure-control-group"> - {{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }} - <span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification - <br> - Set to <strong>0</strong> to disable - </span> - </div> - <div class="pure-control-group"> - {% if not hide_remove_pass %} - {% if current_user.is_authenticated %} - {{ render_button(form.application.form.removepassword_button) }} - {% else %} - {{ render_field(form.application.form.password) }} - <span class="pure-form-message-inline">Password protection for your changedetection.io application.</span> - {% endif %} - {% else %} - <span class="pure-form-message-inline">Password is locked.</span> - {% endif %} - </div> - - <div class="pure-control-group"> - {{ render_checkbox_field(form.application.form.shared_diff_access, class="shared_diff_access") }} - <span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page) - </span> - </div> - <div class="pure-control-group"> - {{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }} - </div> - <div class="pure-control-group"> - {{ render_field(form.application.form.pager_size) }} - <span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span> - </div> - - <div class="pure-control-group"> - {{ render_checkbox_field(form.application.form.extract_title_as_title) }} - <span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span> - </div> - <div class="pure-control-group"> - {{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }} - <span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span> - </div> - {% if form.requests.proxy %} - <div class="pure-control-group inline-radio"> - {{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }} - <span class="pure-form-message-inline"> - Choose a default proxy for all watches - </span> - </div> - {% endif %} - </fieldset> - </div> - - <div class="tab-pane-inner" id="notifications"> - <fieldset> - <div class="field-group"> - {{ render_common_settings_form(form.application.form, emailprefix, settings_application, extra_notification_token_placeholder_info) }} - </div> - </fieldset> - <div class="pure-control-group" id="notification-base-url"> - {{ render_field(form.application.form.base_url, class="m-d") }} - <span class="pure-form-message-inline"> - Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notification links.<br> - Default value is the system environment variable '<code>BASE_URL</code>' - <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>. - </span> - </div> - </div> - - <div class="tab-pane-inner" id="fetching"> - <div class="pure-control-group inline-radio"> - {{ render_field(form.application.form.fetch_backend, class="fetch-backend") }} - <span class="pure-form-message-inline"> - <p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p> - <p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p> - </span> - </div> - <fieldset class="pure-group" id="webdriver-override-options" data-visible-for="application-fetch_backend=html_webdriver"> - <div class="pure-form-message-inline"> - <strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong> - <br> - This will wait <i>n</i> seconds before extracting the text. - </div> - <div class="pure-control-group"> - {{ render_field(form.application.form.webdriver_delay) }} - </div> - </fieldset> - <div class="pure-control-group inline-radio"> - {{ render_field(form.requests.form.default_ua) }} - <span class="pure-form-message-inline"> - Applied to all requests.<br><br> - Note: Simply changing the User-Agent often does not defeat anti-robot technologies, it's important to consider <a href="https://changedetection.io/tutorial/what-are-main-types-anti-robot-mechanisms">all of the ways that the browser is detected</a>. - </span> - </div> - <div class="pure-control-group"> - <br> - Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using Bright Data and Oxylabs Proxies, find out more here.</a> - - </div> - </div> - - <div class="tab-pane-inner" id="filters"> - - <fieldset class="pure-group"> - {{ render_checkbox_field(form.application.form.ignore_whitespace) }} - <span class="pure-form-message-inline">Ignore whitespace, tabs and new-lines/line-feeds when considering if a change was detected.<br> - <i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc. - </span> - </fieldset> - <fieldset class="pure-group"> - {{ render_checkbox_field(form.application.form.render_anchor_tag_content) }} - <span class="pure-form-message-inline">Render anchor tag content, default disabled, when enabled renders links as <code>(link text)[https://somesite.com]</code> - <br> - <i>Note:</i> Changing this could affect the content of your existing watches, possibly trigger alerts etc. - </span> - </fieldset> - <fieldset class="pure-group"> - {{ render_field(form.application.form.global_subtractive_selectors, rows=5, placeholder="header -footer -nav -.stockticker -//*[contains(text(), 'Advertisement')]") }} - <span class="pure-form-message-inline"> - <ul> - <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li> - <li> Don't paste HTML here, use only CSS and XPath selectors </li> - <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li> - </ul> - </span> - </fieldset> - <fieldset class="pure-group"> - {{ render_field(form.application.form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line -/some.regex\d{2}/ for case-INsensitive regex - ") }} - <span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br> - <span class="pure-form-message-inline"> - <ul> - <li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li> - <li>Note: This is applied globally in addition to the per-watch rules.</li> - <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li> - <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li> - <li>Changing this will affect the comparison checksum which may trigger an alert</li> - </ul> - </span> - </fieldset> - </div> - - <div class="tab-pane-inner" id="api"> - <h4>API Access</h4> - <p>Drive your changedetection.io via API, More about <a href="https://github.com/dgtlmoon/changedetection.io/wiki/API-Reference">API access here</a></p> - - <div class="pure-control-group"> - {{ render_checkbox_field(form.application.form.api_access_token_enabled) }} - <div class="pure-form-message-inline">Restrict API access limit by using <code>x-api-key</code> header - required for the Chrome Extension to work</div><br> - <div class="pure-form-message-inline"><br>API Key <span id="api-key">{{api_key}}</span> - <span style="display:none;" id="api-key-copy" >copy</span> - </div> - </div> - <div class="pure-control-group"> - <a href="{{url_for('settings.settings_reset_api_key')}}" class="pure-button button-small button-cancel">Regenerate API key</a> - </div> - <div class="pure-control-group"> - <h4>Chrome Extension</h4> - <p>Easily add any web-page to your changedetection.io installation from within Chrome.</p> - <strong>Step 1</strong> Install the extension, <strong>Step 2</strong> Navigate to this page, - <strong>Step 3</strong> Open the extension from the toolbar and click "<i>Sync API Access</i>" - <p> - <a id="chrome-extension-link" - title="Try our new Chrome Extension!" - href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop"> - <img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}" alt="Chrome"> - Chrome Webstore - </a> - </p> - </div> - </div> - <div class="tab-pane-inner" id="timedate"> - <div class="pure-control-group"> - Ensure the settings below are correct, they are used to manage the time schedule for checking your web page watches. - </div> - <div class="pure-control-group"> - <p><strong>UTC Time & Date from Server:</strong> <span id="utc-time" >{{ utc_time }}</span></p> - <p><strong>Local Time & Date in Browser:</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p> - <p> - {{ render_field(form.application.form.timezone) }} - <datalist id="timezones" style="display: none;"> - {% for tz_name in available_timezones %} - <option value="{{ tz_name }}">{{ tz_name }}</option> - {% endfor %} - </datalist> - </p> - </div> - </div> - <div class="tab-pane-inner" id="proxies"> - <div id="recommended-proxy"> - <div> - <img style="height: 2em;" src="{{url_for('static_content', group='images', filename='brightdata.svg')}}" alt="BrightData Proxy Provider"> - <p>BrightData offer world-class proxy services, "Data Center" proxies are a very affordable way to proxy your requests, whilst <strong><a href="https://brightdata.grsm.io/n0r16zf7eivq">WebUnlocker</a></strong> can help solve most CAPTCHAs.</p> - <p> - BrightData offer many <a href="https://brightdata.com/proxy-types" target="new">many different types of proxies</a>, it is worth reading about what is best for your use-case. - </p> - - <p> - When you have <a href="https://brightdata.grsm.io/n0r16zf7eivq">registered</a>, enabled the required services, visit the <A href="https://brightdata.com/cp/api_example?">API example page</A>, then select <strong>Python</strong>, set the country you wish to use, then copy+paste the access Proxy URL into the "Extra Proxies" boxes below.<br> - </p> - <p> - The Proxy URL with BrightData should start with <code>http://brd-customer...</code> - </p> - <p>When you sign up using <a href="https://brightdata.grsm.io/n0r16zf7eivq">https://brightdata.grsm.io/n0r16zf7eivq</a> BrightData will match any first deposit up to $150</p> - </div> - <div> - <img style="height: 2em;" - src="{{url_for('static_content', group='images', filename='oxylabs.svg')}}" - alt="Oxylabs Proxy Provider"> - <p> - Collect public data at scale with industry-leading web scraping solutions and the world’s - largest ethical proxy network. - </p> - <p> - Oxylabs also provide a <a href="https://oxylabs.io/products/web-unblocker"><strong>WebUnlocker</strong></a> - proxy that bypasses sophisticated anti-bot systems, so you don’t have to.<br> - </p> - <p> - Serve over <a href="https://oxylabs.io/location-proxy">195 countries</a>, providing <a - href="https://oxylabs.io/products/residential-proxy-pool">Residential</a>, <a - href="https://oxylabs.io/products/mobile-proxies">Mobile</a> and <a - href="https://oxylabs.io/products/rotating-isp-proxies">ISP proxies</a> and much more. - </p> - <p> - Use the promo code <strong>boost35</strong> with this link <a href="https://oxylabs.go2cloud.org/SH2d">https://oxylabs.go2cloud.org/SH2d</a> for 35% off Residential, Mobile proxies, Web Unblocker, and Scraper APIs. Built-in proxies enable you to access data from all around the world and help overcome anti-bot solutions. - - </p> - - - </div> - </div> - - <p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites. - - <div class="pure-control-group" id="extra-proxies-setting"> - {{ render_field(form.requests.form.extra_proxies) }} - <span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span><br> - <span class="pure-form-message-inline">SOCKS5 proxies with authentication are only supported with 'plain requests' fetcher, for other fetchers you should whitelist the IP access instead</span> - </div> - <div class="pure-control-group" id="extra-browsers-setting"> - <p> - <span class="pure-form-message-inline"><i>Extra Browsers</i> can be attached to further defeat CAPTCHA's on websites that are particularly hard to scrape.</span><br> - <span class="pure-form-message-inline">Simply paste the connection address into the box, <a href="https://changedetection.io/tutorial/using-bright-datas-scraping-browser-pass-captchas-and-other-protection-when-monitoring">More instructions and examples here</a> </span> - </p> - {{ render_field(form.requests.form.extra_browsers) }} - </div> - </div> - <div id="actions"> - <div class="pure-control-group"> - {{ render_button(form.save_button) }} - <a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a> - <a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a> - </div> - </div> - </form> - </div> -</div> - -{% endblock %} diff --git a/changedetectionio/tests/test_processor_plugins.py b/changedetectionio/tests/test_processor_plugins.py new file mode 100644 index 00000000000..7b66ff6ce79 --- /dev/null +++ b/changedetectionio/tests/test_processor_plugins.py @@ -0,0 +1,120 @@ +import pytest +from time import sleep +from copy import deepcopy +from ..processors import pluggy_interface +from ..processors.pluggy_interface import PLUGIN_NAMESPACE +from ..processors import get_all_plugins_info, available_processors, get_form_class_for_processor +from ..processors.text_json_diff.processor import perform_site_check + +def test_plugin_interfaces(): + """Test that the plugin interface is functioning correctly""" + # The plugin manager should be already set up + assert pluggy_interface.plugin_manager is not None + assert pluggy_interface.plugin_manager.get_namespace() == PLUGIN_NAMESPACE + + # Check that we can get plugins + plugins = pluggy_interface.plugin_manager.get_plugins() + assert len(plugins) >= 3 # Should have at least the 3 built-in plugins + + # Check that the TextJsonDiffPlugin is registered + for plugin in plugins: + if hasattr(plugin, "get_processor_name") and plugin.get_processor_name() == "text_json_diff": + assert plugin.get_processor_description() is not None + assert plugin.get_processor_version() is not None + break + else: + assert False, "TextJsonDiffPlugin not found" + + # Check plugin info collection + plugin_info = get_all_plugins_info() + assert len(plugin_info) >= 3 + + # Check processor list generation + processor_list = available_processors() + assert len(processor_list) >= 3 + + # Ensure each processor has a name and description + for name, description in processor_list: + assert name is not None + assert description is not None + +def test_plugin_form_and_model_handling(): + """Test that plugin form and model handling works""" + # Test getting the form class for text_json_diff + form_class = get_form_class_for_processor("text_json_diff") + assert form_class is not None + + # Test getting the form class for a non-existent processor + form_class = get_form_class_for_processor("non_existent_processor") + assert form_class is not None # Should return the default text_json_diff form + +def test_plugin_enabled_filters(client, live_server): + """Test that enabled plugins filter works""" + # Create a fake datastore with plugin settings and tracking for writes + datastore = type('MockDatastore', (object,), { + 'data': { + 'settings': { + 'application': { + 'enabled_plugins': { + 'text_json_diff': True, + 'restock_diff': False, + 'example_processor': True + } + } + } + }, + 'needs_write': False + }) + + # Get processors filtered by enabled status + processor_list = available_processors(datastore) + + # Should have text_json_diff and example_processor, but not restock_diff + processor_names = [name for name, desc in processor_list] + assert 'text_json_diff' in processor_names + assert 'example_processor' in processor_names + assert 'restock_diff' not in processor_names + + # Test with empty enabled_plugins (should auto-populate with defaults) + datastore.data['settings']['application']['enabled_plugins'] = {} + processor_list = available_processors(datastore) + + # Check that it detected and auto-populated missing plugins + assert len(datastore.data['settings']['application']['enabled_plugins']) >= 3 + assert datastore.needs_write == True + + # Built-in processors should be enabled by default + assert datastore.data['settings']['application']['enabled_plugins']['text_json_diff'] == True + assert datastore.data['settings']['application']['enabled_plugins']['restock_diff'] == True + + # Third-party processors should be disabled by default + assert datastore.data['settings']['application']['enabled_plugins']['example_processor'] == False + + # Only enabled processors should be in the list + processor_names = [name for name, desc in processor_list] + assert 'text_json_diff' in processor_names + assert 'restock_diff' in processor_names + assert 'example_processor' not in processor_names + +def test_plugin_example_implementation(): + """Test the example plugin implementation""" + from ..processors.example_processor_plugin import ExampleProcessorPlugin + + plugin = ExampleProcessorPlugin() + assert plugin.get_processor_name() == "example_processor" + assert "Example Processor Plugin" in plugin.get_processor_description() + assert plugin.get_processor_version() is not None + + # Test the form class + form_class = plugin.get_form_class(processor_name="example_processor") + assert form_class is not None + assert hasattr(form_class, "example_settings") + + # Test the model class + model_class = plugin.get_watch_model_class(processor_name="example_processor") + assert model_class is not None + + # Create an instance of the model and check its methods + model_instance = model_class() + assert hasattr(model_instance, "get_example_threshold") + assert hasattr(model_instance, "is_example_mode_enabled") \ No newline at end of file diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index 28647bada95..2845b7a5e50 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -270,20 +270,16 @@ def run(self): logger.info(f"Processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}") try: + # Get processor handler from pluggy plugin system + from changedetectionio.processors import get_processor_handler + # Processor is what we are using for detecting the "Change" - processor = watch.get('processor', 'text_json_diff') - - # Init a new 'difference_detection_processor', first look in processors - processor_module_name = f"changedetectionio.processors.{processor}.processor" - try: - processor_module = importlib.import_module(processor_module_name) - except ModuleNotFoundError as e: - print(f"Processor module '{processor}' not found.") - raise e - - update_handler = processor_module.perform_site_check(datastore=self.datastore, - watch_uuid=uuid - ) + processor_name = watch.get('processor', 'text_json_diff') + + # Get the handler via the plugin system + update_handler = get_processor_handler(processor_name=processor_name, + datastore=self.datastore, + watch_uuid=uuid) update_handler.call_browser() From 673ec24fa3a302c419262b103286c0df7d8fcfec Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Wed, 19 Mar 2025 15:52:31 +0100 Subject: [PATCH 02/21] More work on plugins --- .../settings/templates/settings.html | 27 +++++++++++++++++++ changedetectionio/processors/__init__.py | 6 +++-- .../processors/example_processor_plugin.py | 10 +++++-- changedetectionio/tests/test_plugins.py | 15 +++++++++++ 4 files changed, 54 insertions(+), 4 deletions(-) create mode 100644 changedetectionio/tests/test_plugins.py diff --git a/changedetectionio/blueprint/settings/templates/settings.html b/changedetectionio/blueprint/settings/templates/settings.html index cc79f0fd6c9..cb9e09e36bf 100644 --- a/changedetectionio/blueprint/settings/templates/settings.html +++ b/changedetectionio/blueprint/settings/templates/settings.html @@ -26,6 +26,7 @@ <li class="tab"><a href="#api">API</a></li> <li class="tab"><a href="#timedate">Time & Date</a></li> <li class="tab"><a href="#proxies">CAPTCHA & Proxies</a></li> + <li class="tab"><a href="#plugins">Plugins</a></li> </ul> </div> <div class="box-wrap inner"> @@ -297,6 +298,32 @@ <h4>Chrome Extension</h4> {{ render_field(form.requests.form.extra_browsers) }} </div> </div> + <div class="tab-pane-inner" id="plugins"> + <div class="pure-control-group"> + <h4>Registered Plugins</h4> + <p>The following plugins are currently registered in the system - <a href="https://changedetection.io/plugins">Get more plugins here</a></p> + + <table class="pure-table pure-table-striped"> + <thead> + <tr> + <th>Name</th> + <th>Description</th> + <th>Version</th> + </tr> + </thead> + <tbody> + {% for plugin in plugins_info %} + <tr> + <td>{{ plugin.name }}</td> + <td>{{ plugin.description }}</td> + <td>{{ plugin.version }}</td> + </tr> + {% endfor %} + </tbody> + </table> + </div> + </div> + <div id="actions"> <div class="pure-control-group"> {{ render_button(form.save_button) }} diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index 299a2ebe44c..e7413c1bb9e 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -330,7 +330,8 @@ def get_processor_description(self): @hookimpl def get_processor_version(self): - return "1.0.0" + from changedetectionio import __version__ + return __version__ @hookimpl def perform_site_check(self, datastore, watch_uuid): @@ -366,7 +367,8 @@ def get_processor_description(self): @hookimpl def get_processor_version(self): - return "1.0.0" + from changedetectionio import __version__ + return __version__ @hookimpl def perform_site_check(self, datastore, watch_uuid): diff --git a/changedetectionio/processors/example_processor_plugin.py b/changedetectionio/processors/example_processor_plugin.py index 058fe9a1040..37aab8f31d5 100644 --- a/changedetectionio/processors/example_processor_plugin.py +++ b/changedetectionio/processors/example_processor_plugin.py @@ -1,14 +1,20 @@ """ Example plugin to demonstrate how to create a new processor plugin """ + from .pluggy_interface import hookimpl -import importlib class ExampleProcessorPlugin: """ Example processor plugin that extends the text_json_diff processor """ - + + def random_string(self, length=50): + import random + import string + + return ''.join(random.choices(string.ascii_letters + string.digits, k=length)) + @hookimpl def get_processor_name(self): return "example_processor" diff --git a/changedetectionio/tests/test_plugins.py b/changedetectionio/tests/test_plugins.py new file mode 100644 index 00000000000..34dc70c1080 --- /dev/null +++ b/changedetectionio/tests/test_plugins.py @@ -0,0 +1,15 @@ +from flask import url_for + +from changedetectionio.tests.util import live_server_setup + + +def test_checkplugins_registered(live_server, client): + live_server_setup(live_server) + res = client.get( + url_for("settings.settings_page") + ) + assert res.status_code == 200 + # Should be registered in the info table + assert b'<td>Webpage Text/HTML, JSON and PDF changes' in res.data + assert b'<td>text_json_diff' in res.data + From a262f373ccc147088565c9dd2f153e01f9eb8abd Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Wed, 19 Mar 2025 17:43:16 +0100 Subject: [PATCH 03/21] Remove hard coded exmaples stuff --- changedetectionio/processors/__init__.py | 14 -- .../processors/example_processor_plugin.py | 168 ------------------ .../processors/pluggy_interface.py | 9 +- .../processors/test_plugin_example.py | 46 ----- 4 files changed, 7 insertions(+), 230 deletions(-) delete mode 100644 changedetectionio/processors/example_processor_plugin.py delete mode 100644 changedetectionio/processors/test_plugin_example.py diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index e7413c1bb9e..5b5a4b5df91 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -396,8 +396,6 @@ def get_watch_model_class(self, processor_name): return Watch.model return None -# Import our example plugins -from .example_processor_plugin import ExampleProcessorPlugin # For backward compatibility def get_custom_watch_obj_for_processor(processor_name): @@ -406,15 +404,3 @@ def get_custom_watch_obj_for_processor(processor_name): # Register the built-in processor plugins plugin_manager.register(TextJsonDiffPlugin()) plugin_manager.register(RestockDiffPlugin()) -plugin_manager.register(ExampleProcessorPlugin()) - -# Check for test plugin and conditionally register it -try: - # This avoids circular imports - from .test_plugin_example import ExampleProcessorPlugin as TestExampleProcessorPlugin - test_plugin_instance = TestExampleProcessorPlugin() - # Only register if it has a different name than the regular example plugin - if test_plugin_instance.get_processor_name() != "example_processor": - plugin_manager.register(test_plugin_instance) -except (ImportError, AttributeError): - pass \ No newline at end of file diff --git a/changedetectionio/processors/example_processor_plugin.py b/changedetectionio/processors/example_processor_plugin.py deleted file mode 100644 index 37aab8f31d5..00000000000 --- a/changedetectionio/processors/example_processor_plugin.py +++ /dev/null @@ -1,168 +0,0 @@ -""" -Example plugin to demonstrate how to create a new processor plugin -""" - -from .pluggy_interface import hookimpl - -class ExampleProcessorPlugin: - """ - Example processor plugin that extends the text_json_diff processor - """ - - def random_string(self, length=50): - import random - import string - - return ''.join(random.choices(string.ascii_letters + string.digits, k=length)) - - @hookimpl - def get_processor_name(self): - return "example_processor" - - @hookimpl - def get_processor_description(self): - return "Example Processor Plugin - For demonstration purposes" - - @hookimpl - def get_processor_version(self): - return "0.1.0-beta" - - @hookimpl - def perform_site_check(self, datastore, watch_uuid): - watch = datastore.data['watching'].get(watch_uuid) - if watch and watch.get('processor') == 'example_processor': - # Log that we're using our special example processor - from loguru import logger - - # Check if the example mode is enabled - if watch.is_example_mode_enabled(): - # Get the threshold value for our plugin - threshold = watch.get_example_threshold() - logger.info(f"Example processor using mode: {watch.get('example_settings', {}).get('mode')} with threshold: {threshold}") - - # Check if advanced features are enabled - advanced_features = watch.get('example_settings', {}).get('example_toggle', False) - if advanced_features: - logger.info("Example processor advanced features are enabled") - else: - logger.info("Example processor is in OFF mode, using standard processing") - - # Import here to avoid circular imports - from changedetectionio.processors.text_json_diff.processor import perform_site_check - return perform_site_check(datastore=datastore, watch_uuid=watch_uuid) - return None - - @hookimpl - def get_form_class(self, processor_name): - if processor_name == 'example_processor': - # Import here to avoid circular imports - from changedetectionio import forms - from wtforms import StringField, BooleanField, TextAreaField, RadioField, FloatField - from wtforms.validators import Optional, NumberRange - from wtforms.fields.form import FormField - from wtforms.form import Form - - # Create a settings form for the example plugin - class ExampleSettingsForm(Form): - mode = RadioField(label='Example Mode', choices=[ - ('mode_a', "Mode A - Default behavior"), - ('mode_b', "Mode B - Alternative behavior"), - ('off', "Off - Disable example functionality"), - ], default="mode_a") - - threshold = FloatField('Threshold value', [ - Optional(), - NumberRange(min=0, max=100, message="Should be between 0 and 100") - ], render_kw={"placeholder": "0", "size": "5"}) - - example_toggle = BooleanField('Enable advanced features', default=False) - example_notes = TextAreaField('Notes', validators=[Optional()]) - - # Create the main form by extending the base form - class ExampleProcessorForm(forms.processor_text_json_diff_form): - example_settings = FormField(ExampleSettingsForm) - - def extra_tab_content(self): - return 'Example Plugin' - - def extra_form_content(self): - output = "" - - # Show warning if tag overrides settings (similar to restock plugin) - if getattr(self, 'watch', None) and getattr(self, 'datastore'): - for tag_uuid in self.watch.get('tags'): - tag = self.datastore.data['settings']['application']['tags'].get(tag_uuid, {}) - if tag.get('overrides_watch'): - output = f"""<p><strong>Note! A Group tag overrides the example plugin settings here.</strong></p><style>#example-fieldset-group {{ opacity: 0.6; }}</style>""" - - output += """ - {% from '_helpers.html' import render_field, render_checkbox_field, render_button %} - <script> - $(document).ready(function () { - toggleOpacity('#example_settings-example_toggle', '.example-advanced-settings', true); - }); - </script> - - <fieldset id="example-fieldset-group"> - <div class="pure-control-group"> - <fieldset class="pure-group inline-radio"> - {{ render_field(form.example_settings.mode) }} - </fieldset> - <fieldset class="pure-group"> - {{ render_checkbox_field(form.example_settings.example_toggle) }} - <span class="pure-form-message-inline">Enable advanced example features</span> - </fieldset> - <fieldset class="pure-group example-advanced-settings"> - {{ render_field(form.example_settings.threshold) }} - <span class="pure-form-message-inline">Set the threshold percentage for this example plugin</span> - <span class="pure-form-message-inline">For example, 5% means the plugin will only activate when changes exceed 5% of the content</span> - </fieldset> - <fieldset class="pure-group example-advanced-settings"> - {{ render_field(form.example_settings.example_notes, rows=3, placeholder="Add any notes here...") }} - <span class="pure-form-message-inline">Additional notes for this watch</span> - </fieldset> - </div> - </fieldset> - """ - return output - - return ExampleProcessorForm - return None - - @hookimpl - def get_watch_model_class(self, processor_name): - if processor_name == 'example_processor': - # Import here to avoid circular imports - from changedetectionio.model import Watch - - # Create a custom Watch model class for the example plugin - class ExampleWatchModel(Watch.model): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - # Initialize example plugin settings if not present - if not self.get('example_settings'): - self['example_settings'] = { - 'mode': 'mode_a', - 'threshold': 0, - 'example_toggle': False, - 'example_notes': '' - } - - # Add any custom methods for the example plugin - def get_example_threshold(self): - """Get the threshold value or return the default""" - settings = self.get('example_settings', {}) - return settings.get('threshold', 0) - - def is_example_mode_enabled(self): - """Check if the example plugin is enabled""" - settings = self.get('example_settings', {}) - return settings.get('mode') != 'off' - - return ExampleWatchModel - return None - -# This function would be called by the setup.py entry_points -def register_plugin(plugin_manager): - plugin_manager.register(ExampleProcessorPlugin()) \ No newline at end of file diff --git a/changedetectionio/processors/pluggy_interface.py b/changedetectionio/processors/pluggy_interface.py index 4ab9692a041..350821d362e 100644 --- a/changedetectionio/processors/pluggy_interface.py +++ b/changedetectionio/processors/pluggy_interface.py @@ -1,4 +1,5 @@ import pluggy +from loguru import logger # Ensure that the namespace in HookspecMarker matches PluginManager PLUGIN_NAMESPACE = "changedetectionio_processors" @@ -60,5 +61,9 @@ def get_watch_model_class(processor_name): # Register hookspecs plugin_manager.add_hookspecs(ProcessorSpec) -# Discover installed plugins from external packages (if any) -plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE) \ No newline at end of file +try: + # Discover installed plugins from external packages (if any) + plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE) + logger.info(f"Loaded plugins: {plugin_manager.get_plugins()}") +except Exception as e: + logger.critical(f"Error loading plugins: {str(e)}") \ No newline at end of file diff --git a/changedetectionio/processors/test_plugin_example.py b/changedetectionio/processors/test_plugin_example.py deleted file mode 100644 index 6a12bf7c2d4..00000000000 --- a/changedetectionio/processors/test_plugin_example.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -Example plugin to demonstrate how to create a new processor plugin -""" -from .pluggy_interface import hookimpl -from .text_json_diff.processor import perform_site_check as text_json_diff_perform_site_check -from changedetectionio import forms - -class ExampleProcessorPlugin: - """ - Example processor plugin that extends the text_json_diff processor - """ - - @hookimpl - def get_processor_name(self): - return "example_processor" - - @hookimpl - def get_processor_description(self): - return "Example Processor Plugin - For demonstration purposes" - - @hookimpl - def perform_site_check(self, datastore, watch_uuid): - watch = datastore.data['watching'].get(watch_uuid) - if watch and watch.get('processor') == 'example_processor': - # This processor is just a wrapper around text_json_diff for demonstration - return text_json_diff_perform_site_check(datastore=datastore, watch_uuid=watch_uuid) - return None - - @hookimpl - def get_form_class(self, processor_name): - if processor_name == 'example_processor': - # Use the default form for this example - return forms.processor_text_json_diff_form - return None - - @hookimpl - def get_watch_model_class(self, processor_name): - if processor_name == 'example_processor': - # Use the default Watch model for this example - from changedetectionio.model import Watch - return Watch.model - return None - -# This function would be called by the setup.py entry_points -def register_plugin(plugin_manager): - plugin_manager.register(ExampleProcessorPlugin()) \ No newline at end of file From b9a068b05059b81a2439c03426d9a0f9960c3a1b Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Wed, 19 Mar 2025 17:49:13 +0100 Subject: [PATCH 04/21] Small type check --- changedetectionio/update_worker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index 2845b7a5e50..052e449a7cd 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -531,10 +531,10 @@ def run(self): # Also save the snapshot on the first time checked, "last checked" will always be updated, so we just check history length. if changed_detected or not watch.history_n: - if update_handler.screenshot: + if hasattr(update_handler, "screenshot") and update_handler.screenshot: watch.save_screenshot(screenshot=update_handler.screenshot) - if update_handler.xpath_data: + if hasattr(update_handler, "xpath_data") and update_handler.xpath_data: watch.save_xpath_data(data=update_handler.xpath_data) # Small hack so that we sleep just enough to allow 1 second between history snapshots From eff6c1cdd32f7f3ee015f4196942352c37666ec1 Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Wed, 19 Mar 2025 17:55:14 +0100 Subject: [PATCH 05/21] Remove enabled pluginsd --- changedetectionio/blueprint/settings/__init__.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/changedetectionio/blueprint/settings/__init__.py b/changedetectionio/blueprint/settings/__init__.py index 2614794ad69..aa47fc1800e 100644 --- a/changedetectionio/blueprint/settings/__init__.py +++ b/changedetectionio/blueprint/settings/__init__.py @@ -109,18 +109,7 @@ def settings_page(): datastore.data['settings']['application'].update(app_update) datastore.data['settings']['requests'].update(form.data['requests']) - - # Update plugin settings from the dynamically created fields - enabled_plugins = {} - if hasattr(form, 'plugins'): - for field_name, field in form.plugins._fields.items(): - if field_name.startswith('plugin_'): - plugin_name = field_name.replace('plugin_', '') - enabled_plugins[plugin_name] = field.data - - # Update the datastore with plugin settings - datastore.data['settings']['application']['enabled_plugins'] = enabled_plugins - + datastore.needs_write_urgent = True flash("Settings updated.") From 80c05516f7738cfe845e83485d4c8cf7c001c091 Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Wed, 19 Mar 2025 17:55:45 +0100 Subject: [PATCH 06/21] remove 'enabled plugins' --- changedetectionio/forms.py | 16 ++-------- changedetectionio/model/App.py | 3 +- changedetectionio/processors/__init__.py | 37 ++---------------------- 3 files changed, 6 insertions(+), 50 deletions(-) diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index 3d204c117e1..59eea0fea47 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -514,11 +514,8 @@ class quickWatchForm(Form): def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs): super().__init__(formdata, obj, prefix, data, meta, **kwargs) # Set processor choices based on datastore if available - datastore = kwargs.get('datastore') - if datastore: - self.processor.choices = self.processors.available_processors(datastore) - else: - self.processor.choices = self.processors.available_processors() + #datastore = kwargs.get('datastore') + self.processor.choices = self.processors.available_processors() @@ -775,15 +772,6 @@ class globalSettingsApplicationForm(commonSettingsForm): validators=[validators.NumberRange(min=0, message="Should contain zero or more attempts")]) - # Create plugins form and add it as an attribute -# plugin_form = PluginsManagementForm( -# formdata=formdata, -# plugins_info=plugins_info, -# enabled_plugins=enabled_plugins -# ) - - - class globalSettingsForm(Form): # Define these as FormFields/"sub forms", this way it matches the JSON storage # datastore.data['settings']['application'].. diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index b4a55fd768a..17038434ab9 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -53,8 +53,7 @@ class model(dict): 'shared_diff_access': False, 'webdriver_delay': None , # Extra delay in seconds before extracting text 'tags': {}, #@todo use Tag.model initialisers - 'timezone': None, # Default IANA timezone name - 'enabled_plugins': {} # Dictionary of plugin names and their enabled status + 'timezone': None # Default IANA timezone name } } } diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index 5b5a4b5df91..a1d55f9ead8 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -218,40 +218,9 @@ def available_processors(datastore=None): """ plugins_info = get_all_plugins_info() processor_list = [] - - # If datastore is provided, filter by enabled_plugins - if datastore: - # Make sure enabled_plugins exists in datastore - if 'enabled_plugins' not in datastore.data['settings']['application']: - datastore.data['settings']['application']['enabled_plugins'] = {} - - enabled_plugins = datastore.data['settings']['application']['enabled_plugins'] - - # Scan for any new plugins that aren't in the enabled_plugins dict yet - # Default built-in processors to enabled, third-party to disabled - plugins_updated = False - for plugin in plugins_info: - if plugin["name"] not in enabled_plugins: - # Built-in processors are enabled by default - if plugin["name"] in ["text_json_diff", "restock_diff"]: - enabled_plugins[plugin["name"]] = True - else: - # Third-party plugins are disabled by default - enabled_plugins[plugin["name"]] = False - plugins_updated = True - - # Save changes if we added new plugins - if plugins_updated: - datastore.needs_write = True - - # Only include enabled plugins - for plugin in plugins_info: - if enabled_plugins.get(plugin["name"], False): - processor_list.append((plugin["name"], plugin["description"])) - else: - # No datastore provided, include all plugins - for plugin in plugins_info: - processor_list.append((plugin["name"], plugin["description"])) + + for plugin in plugins_info: + processor_list.append((plugin["name"], plugin["description"])) return processor_list From 80ed6cbfc59c12ddca4d0322829ced3a58964cb5 Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Wed, 19 Mar 2025 17:56:12 +0100 Subject: [PATCH 07/21] not needed --- .../tests/test_processor_plugins.py | 120 ------------------ 1 file changed, 120 deletions(-) delete mode 100644 changedetectionio/tests/test_processor_plugins.py diff --git a/changedetectionio/tests/test_processor_plugins.py b/changedetectionio/tests/test_processor_plugins.py deleted file mode 100644 index 7b66ff6ce79..00000000000 --- a/changedetectionio/tests/test_processor_plugins.py +++ /dev/null @@ -1,120 +0,0 @@ -import pytest -from time import sleep -from copy import deepcopy -from ..processors import pluggy_interface -from ..processors.pluggy_interface import PLUGIN_NAMESPACE -from ..processors import get_all_plugins_info, available_processors, get_form_class_for_processor -from ..processors.text_json_diff.processor import perform_site_check - -def test_plugin_interfaces(): - """Test that the plugin interface is functioning correctly""" - # The plugin manager should be already set up - assert pluggy_interface.plugin_manager is not None - assert pluggy_interface.plugin_manager.get_namespace() == PLUGIN_NAMESPACE - - # Check that we can get plugins - plugins = pluggy_interface.plugin_manager.get_plugins() - assert len(plugins) >= 3 # Should have at least the 3 built-in plugins - - # Check that the TextJsonDiffPlugin is registered - for plugin in plugins: - if hasattr(plugin, "get_processor_name") and plugin.get_processor_name() == "text_json_diff": - assert plugin.get_processor_description() is not None - assert plugin.get_processor_version() is not None - break - else: - assert False, "TextJsonDiffPlugin not found" - - # Check plugin info collection - plugin_info = get_all_plugins_info() - assert len(plugin_info) >= 3 - - # Check processor list generation - processor_list = available_processors() - assert len(processor_list) >= 3 - - # Ensure each processor has a name and description - for name, description in processor_list: - assert name is not None - assert description is not None - -def test_plugin_form_and_model_handling(): - """Test that plugin form and model handling works""" - # Test getting the form class for text_json_diff - form_class = get_form_class_for_processor("text_json_diff") - assert form_class is not None - - # Test getting the form class for a non-existent processor - form_class = get_form_class_for_processor("non_existent_processor") - assert form_class is not None # Should return the default text_json_diff form - -def test_plugin_enabled_filters(client, live_server): - """Test that enabled plugins filter works""" - # Create a fake datastore with plugin settings and tracking for writes - datastore = type('MockDatastore', (object,), { - 'data': { - 'settings': { - 'application': { - 'enabled_plugins': { - 'text_json_diff': True, - 'restock_diff': False, - 'example_processor': True - } - } - } - }, - 'needs_write': False - }) - - # Get processors filtered by enabled status - processor_list = available_processors(datastore) - - # Should have text_json_diff and example_processor, but not restock_diff - processor_names = [name for name, desc in processor_list] - assert 'text_json_diff' in processor_names - assert 'example_processor' in processor_names - assert 'restock_diff' not in processor_names - - # Test with empty enabled_plugins (should auto-populate with defaults) - datastore.data['settings']['application']['enabled_plugins'] = {} - processor_list = available_processors(datastore) - - # Check that it detected and auto-populated missing plugins - assert len(datastore.data['settings']['application']['enabled_plugins']) >= 3 - assert datastore.needs_write == True - - # Built-in processors should be enabled by default - assert datastore.data['settings']['application']['enabled_plugins']['text_json_diff'] == True - assert datastore.data['settings']['application']['enabled_plugins']['restock_diff'] == True - - # Third-party processors should be disabled by default - assert datastore.data['settings']['application']['enabled_plugins']['example_processor'] == False - - # Only enabled processors should be in the list - processor_names = [name for name, desc in processor_list] - assert 'text_json_diff' in processor_names - assert 'restock_diff' in processor_names - assert 'example_processor' not in processor_names - -def test_plugin_example_implementation(): - """Test the example plugin implementation""" - from ..processors.example_processor_plugin import ExampleProcessorPlugin - - plugin = ExampleProcessorPlugin() - assert plugin.get_processor_name() == "example_processor" - assert "Example Processor Plugin" in plugin.get_processor_description() - assert plugin.get_processor_version() is not None - - # Test the form class - form_class = plugin.get_form_class(processor_name="example_processor") - assert form_class is not None - assert hasattr(form_class, "example_settings") - - # Test the model class - model_class = plugin.get_watch_model_class(processor_name="example_processor") - assert model_class is not None - - # Create an instance of the model and check its methods - model_instance = model_class() - assert hasattr(model_instance, "get_example_threshold") - assert hasattr(model_instance, "is_example_mode_enabled") \ No newline at end of file From a218b10c5fa4007d02f37cbc379ce77801f35284 Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Wed, 19 Mar 2025 23:56:51 +0100 Subject: [PATCH 08/21] Remove extra form redef --- changedetectionio/blueprint/settings/__init__.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/changedetectionio/blueprint/settings/__init__.py b/changedetectionio/blueprint/settings/__init__.py index aa47fc1800e..a8f71567d22 100644 --- a/changedetectionio/blueprint/settings/__init__.py +++ b/changedetectionio/blueprint/settings/__init__.py @@ -88,16 +88,7 @@ def settings_page(): # Get processor plugins info from changedetectionio.processors import get_all_plugins_info plugins_info = get_all_plugins_info() - - # Create/update form with plugins info - default = deepcopy(datastore.data['settings']) - form = forms.globalSettingsForm( - formdata=request.form if request.method == 'POST' else None, - data=default, - extra_notification_tokens=datastore.get_unique_notification_tokens_available(), - plugins_info=plugins_info - ) - + # Process settings including plugin toggles if request.method == 'POST' and form.validate(): # Process the main form data From d6470bc963747cce0eb3c9d98e75eff05fd62003 Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Wed, 19 Mar 2025 23:59:27 +0100 Subject: [PATCH 09/21] fix test --- changedetectionio/tests/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changedetectionio/tests/test_api.py b/changedetectionio/tests/test_api.py index 097133fef57..58a520dad3f 100644 --- a/changedetectionio/tests/test_api.py +++ b/changedetectionio/tests/test_api.py @@ -377,7 +377,7 @@ def test_api_import(client, live_server, measure_memory_usage): api_key = extract_api_key_from_UI(client) res = client.post( - url_for("import") + "?tag=import-test", + url_for("imports.import_page") + "?tag=import-test", data='https://website1.com\r\nhttps://website2.com', headers={'x-api-key': api_key}, follow_redirects=True From b612e5ace0c0d8b464962c71b55d2250d6182397 Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Thu, 20 Mar 2025 00:01:44 +0100 Subject: [PATCH 10/21] Revert "fix test" This reverts commit d6470bc963747cce0eb3c9d98e75eff05fd62003. --- changedetectionio/tests/test_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changedetectionio/tests/test_api.py b/changedetectionio/tests/test_api.py index 58a520dad3f..097133fef57 100644 --- a/changedetectionio/tests/test_api.py +++ b/changedetectionio/tests/test_api.py @@ -377,7 +377,7 @@ def test_api_import(client, live_server, measure_memory_usage): api_key = extract_api_key_from_UI(client) res = client.post( - url_for("imports.import_page") + "?tag=import-test", + url_for("import") + "?tag=import-test", data='https://website1.com\r\nhttps://website2.com', headers={'x-api-key': api_key}, follow_redirects=True From 03725992d026a803e0e96aee9c9bbcb26a10e2c3 Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Thu, 20 Mar 2025 00:10:58 +0100 Subject: [PATCH 11/21] Use new pyppeteerng --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6fbd5d569fc..07864359f6d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -73,7 +73,7 @@ jq~=1.3; python_version >= "3.8" and sys_platform == "linux" # playwright is installed at Dockerfile build time because it's not available on all platforms -pyppeteer-ng==2.0.0rc5 +pyppeteer-ng==2.0.0rc6 pyppeteerstealth>=0.0.4 # Include pytest, so if theres a support issue we can ask them to run these tests on their setup From 822a985b16e500574a11c8a81775610e7e0b19df Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Thu, 20 Mar 2025 09:37:50 +0100 Subject: [PATCH 12/21] fix imports --- changedetectionio/processors/__init__.py | 18 ++++++------------ .../processors/restock_diff/__init__.py | 2 +- .../processors/restock_diff/processor.py | 3 ++- changedetectionio/store.py | 10 +++++----- .../tests/test_restock_itemprop.py | 7 +++++-- 5 files changed, 19 insertions(+), 21 deletions(-) diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index a1d55f9ead8..f5cb215b0ff 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -4,12 +4,10 @@ from copy import deepcopy from loguru import logger import hashlib -import importlib -import inspect import os -import pkgutil import re -import sys + + from .pluggy_interface import plugin_manager, hookimpl class difference_detection_processor(): @@ -275,13 +273,14 @@ def get_watch_model_for_processor(processor_name): Get the Watch model class for the specified processor name :return: The Watch model class """ + # Try each plugin in turn for plugin in plugin_manager.get_plugins(): if hasattr(plugin, "get_watch_model_class"): model_class = plugin.get_watch_model_class(processor_name=processor_name) if model_class: return model_class - + # Default to standard Watch model from changedetectionio.model import Watch return Watch.model @@ -360,16 +359,11 @@ def get_form_class(self, processor_name): @hookimpl def get_watch_model_class(self, processor_name): if processor_name == 'restock_diff': - # Currently uses default watch model, could be customized in the future - from changedetectionio.model import Watch - return Watch.model + from . import restock_diff + return restock_diff.Watch return None -# For backward compatibility -def get_custom_watch_obj_for_processor(processor_name): - return get_watch_model_for_processor(processor_name) - # Register the built-in processor plugins plugin_manager.register(TextJsonDiffPlugin()) plugin_manager.register(RestockDiffPlugin()) diff --git a/changedetectionio/processors/restock_diff/__init__.py b/changedetectionio/processors/restock_diff/__init__.py index 3d472beece0..9bee55d78fc 100644 --- a/changedetectionio/processors/restock_diff/__init__.py +++ b/changedetectionio/processors/restock_diff/__init__.py @@ -1,5 +1,4 @@ -from babel.numbers import parse_decimal from changedetectionio.model.Watch import model as BaseWatch from typing import Union import re @@ -7,6 +6,7 @@ class Restock(dict): def parse_currency(self, raw_value: str) -> Union[float, None]: + from babel.numbers import parse_decimal # Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer. standardized_value = raw_value diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py index ac3512108ac..8242f4e8ad6 100644 --- a/changedetectionio/processors/restock_diff/processor.py +++ b/changedetectionio/processors/restock_diff/processor.py @@ -152,7 +152,8 @@ def run_changedetection(self, watch): # Unset any existing notification error update_obj = {'last_notification_error': False, 'last_error': False, 'restock': Restock()} - + if not 'restock_settings' in watch.keys(): + raise Exception("Restock settings not found in watch.") self.screenshot = self.fetcher.screenshot self.xpath_data = self.fetcher.xpath_data diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 9d5c70861ec..4e8f7e794be 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -18,8 +18,7 @@ import uuid as uuid_builder from loguru import logger -from .processors import get_custom_watch_obj_for_processor -from .processors.restock_diff import Restock +from .processors import get_watch_model_for_processor # Because the server will run as a daemon and wont know the URL for notification links when firing off a notification BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)' @@ -150,10 +149,10 @@ def rehydrate_entity(self, uuid, entity, processor_override=None): entity['uuid'] = uuid if processor_override: - watch_class = get_custom_watch_obj_for_processor(processor_override) + watch_class = get_watch_model_for_processor(processor_override) entity['processor']=processor_override else: - watch_class = get_custom_watch_obj_for_processor(entity.get('processor')) + watch_class = get_watch_model_for_processor(entity.get('processor')) if entity.get('uuid') != 'text_json_diff': logger.trace(f"Loading Watch object '{watch_class.__module__}.{watch_class.__name__}' for UUID {uuid}") @@ -345,7 +344,7 @@ def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now= apply_extras['tags'] = list(set(apply_extras.get('tags'))) # If the processor also has its own Watch implementation - watch_class = get_custom_watch_obj_for_processor(apply_extras.get('processor')) + watch_class = get_watch_model_for_processor(apply_extras.get('processor')) new_watch = watch_class(datastore_path=self.datastore_path, url=url) new_uuid = new_watch.get('uuid') @@ -890,6 +889,7 @@ def update_16(self): # Migrate old 'in_stock' values to the new Restock def update_17(self): + from .processors.restock_diff import Restock for uuid, watch in self.data['watching'].items(): if 'in_stock' in watch: watch['restock'] = Restock({'in_stock': watch.get('in_stock')}) diff --git a/changedetectionio/tests/test_restock_itemprop.py b/changedetectionio/tests/test_restock_itemprop.py index f5e852f470e..3cd0f7a72c4 100644 --- a/changedetectionio/tests/test_restock_itemprop.py +++ b/changedetectionio/tests/test_restock_itemprop.py @@ -95,12 +95,14 @@ def test_itemprop_price_change(client, live_server): test_url = url_for('test_endpoint', _external=True) set_original_response(props_markup=instock_props[0], price="190.95") - client.post( + res = client.post( url_for("ui.ui_views.form_quick_watch_add"), data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'}, follow_redirects=True ) + assert res.status_code == 200 + # A change in price, should trigger a change by default wait_for_all_checks(client) res = client.get(url_for("index")) @@ -110,6 +112,7 @@ def test_itemprop_price_change(client, live_server): set_original_response(props_markup=instock_props[0], price='180.45') client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) + res = client.get(url_for("index")) assert b'180.45' in res.data assert b'unviewed' in res.data @@ -395,7 +398,7 @@ def test_data_sanity(client, live_server): test_url = url_for('test_endpoint', _external=True) test_url2 = url_for('test_endpoint2', _external=True) set_original_response(props_markup=instock_props[0], price="950.95") - client.post( + res = client.post( url_for("ui.ui_views.form_quick_watch_add"), data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'}, follow_redirects=True From 9182918139dcd687819f64fd02d1b4fb8431e7a9 Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Fri, 21 Mar 2025 10:23:15 +0100 Subject: [PATCH 13/21] improve datastore object (better for switching model types) --- .../blueprint/price_data_follower/__init__.py | 7 ++ changedetectionio/model/Watch.py | 1 + changedetectionio/model/__init__.py | 73 +++++++++++++++++-- .../processors/restock_diff/__init__.py | 2 +- changedetectionio/store.py | 21 ++---- .../test_automatic_follow_ldjson_price.py | 7 +- 6 files changed, 87 insertions(+), 24 deletions(-) diff --git a/changedetectionio/blueprint/price_data_follower/__init__.py b/changedetectionio/blueprint/price_data_follower/__init__.py index 018d54fbad2..d98739d4942 100644 --- a/changedetectionio/blueprint/price_data_follower/__init__.py +++ b/changedetectionio/blueprint/price_data_follower/__init__.py @@ -12,10 +12,17 @@ def construct_blueprint(datastore, update_q: PriorityQueue): @login_required @price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET']) def accept(uuid): + + old_data = datastore.data['watching'][uuid].get_data() + + datastore.data['watching'][uuid] = datastore.rehydrate_entity(default_dict=old_data, processor_override='restock_diff') datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT datastore.data['watching'][uuid]['processor'] = 'restock_diff' datastore.data['watching'][uuid].clear_watch() + + # Queue the watch for updating update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) + return redirect(url_for("index")) @login_required diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index c04af711bbb..f91371c5718 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -42,6 +42,7 @@ def __init__(self, *arg, **kw): if kw.get('datastore_path'): del kw['datastore_path'] super(model, self).__init__(*arg, **kw) + if kw.get('default'): self.update(kw['default']) del kw['default'] diff --git a/changedetectionio/model/__init__.py b/changedetectionio/model/__init__.py index 49b3209c8f2..ad926c1306b 100644 --- a/changedetectionio/model/__init__.py +++ b/changedetectionio/model/__init__.py @@ -7,7 +7,8 @@ class watch_base(dict): def __init__(self, *arg, **kw): - self.update({ + # Initialize internal data storage + self.__data = { # Custom notification content # Re #110, so then if this is set to None, we know to use the default value instead # Requires setting to None on submit if it's the same as the default @@ -124,12 +125,72 @@ def __init__(self, *arg, **kw): 'remove_duplicate_lines': False, 'trigger_text': [], # List of text or regex to wait for until a change is detected 'url': '', - 'uuid': str(uuid.uuid4()), + 'uuid': None, 'webdriver_delay': None, 'webdriver_js_execute_code': None, # Run before change-detection - }) + } - super(watch_base, self).__init__(*arg, **kw) + # Initialize as empty dict but maintain dict interface + super(watch_base, self).__init__() + + # Update with provided data + if arg or kw: + self.update(*arg, **kw) - if self.get('default'): - del self['default'] \ No newline at end of file + # Generate UUID if needed + if not self.__data.get('uuid'): + self.__data['uuid'] = str(uuid.uuid4()) + + # Dictionary interface methods to use self.__data + def __getitem__(self, key): + return self.__data[key] + + def __setitem__(self, key, value): + self.__data[key] = value + + def __delitem__(self, key): + del self.__data[key] + + def __contains__(self, key): + return key in self.__data + + def __iter__(self): + return iter(self.__data) + + def __len__(self): + return len(self.__data) + + def get(self, key, default=None): + return self.__data.get(key, default) + + def update(self, *args, **kwargs): + if args: + if len(args) > 1: + raise TypeError("update expected at most 1 arguments, got %d" % len(args)) + other = dict(args[0]) + for key in other: + self.__data[key] = other[key] + for key in kwargs: + self.__data[key] = kwargs[key] + + def items(self): + return self.__data.items() + + def keys(self): + return self.__data.keys() + + def values(self): + return self.__data.values() + + def pop(self, key, default=None): + return self.__data.pop(key, default) + + def popitem(self): + return self.__data.popitem() + + def clear(self): + self.__data.clear() + + def get_data(self): + """Returns the internal data dictionary""" + return self.__data diff --git a/changedetectionio/processors/restock_diff/__init__.py b/changedetectionio/processors/restock_diff/__init__.py index 9bee55d78fc..c02813e9b3a 100644 --- a/changedetectionio/processors/restock_diff/__init__.py +++ b/changedetectionio/processors/restock_diff/__init__.py @@ -63,7 +63,7 @@ def __init__(self, *arg, **kw): self['restock_settings'] = kw['default']['restock_settings'] if kw.get('default',{}).get('restock_settings') else { 'follow_price_changes': True, 'in_stock_processing' : 'in_stock_only' - } #@todo update + } def clear_watch(self): super().clear_watch() diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 4e8f7e794be..f0fb76ed5cf 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -83,12 +83,12 @@ def __init__(self, datastore_path="/datastore", include_default_watches=True, ve # Convert each existing watch back to the Watch.model object for uuid, watch in self.__data['watching'].items(): - self.__data['watching'][uuid] = self.rehydrate_entity(uuid, watch) + self.__data['watching'][uuid] = self.rehydrate_entity(default_dict=watch) logger.info(f"Watching: {uuid} {watch['url']}") # And for Tags also, should be Restock type because it has extra settings for uuid, tag in self.__data['settings']['application']['tags'].items(): - self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(uuid, tag, processor_override='restock_diff') + self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(default_dict=tag, processor_override='restock_diff') logger.info(f"Tag: {uuid} {tag['title']}") # First time ran, Create the datastore. @@ -144,20 +144,11 @@ def __init__(self, datastore_path="/datastore", include_default_watches=True, ve # Finally start the thread that will manage periodic data saves to JSON save_data_thread = threading.Thread(target=self.save_datastore).start() - def rehydrate_entity(self, uuid, entity, processor_override=None): - """Set the dict back to the dict Watch object""" - entity['uuid'] = uuid + def rehydrate_entity(self, default_dict: dict, processor_override='text_json_diff'): - if processor_override: - watch_class = get_watch_model_for_processor(processor_override) - entity['processor']=processor_override - else: - watch_class = get_watch_model_for_processor(entity.get('processor')) - - if entity.get('uuid') != 'text_json_diff': - logger.trace(f"Loading Watch object '{watch_class.__module__}.{watch_class.__name__}' for UUID {uuid}") - - entity = watch_class(datastore_path=self.datastore_path, default=entity) + watch_class = get_watch_model_for_processor(processor_override) + default_dict['processor'] = processor_override + entity = watch_class(datastore_path=self.datastore_path, default=default_dict) return entity def set_last_viewed(self, uuid, timestamp): diff --git a/changedetectionio/tests/test_automatic_follow_ldjson_price.py b/changedetectionio/tests/test_automatic_follow_ldjson_price.py index 2e085d3f856..97054119271 100644 --- a/changedetectionio/tests/test_automatic_follow_ldjson_price.py +++ b/changedetectionio/tests/test_automatic_follow_ldjson_price.py @@ -101,7 +101,9 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage # Accept it uuid = next(iter(live_server.app.config['DATASTORE'].data['watching'])) #time.sleep(1) - client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True)) + res = client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True)) + # should now be switched to restock_mode + wait_for_all_checks(client) client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) # Offer should be gone @@ -154,6 +156,7 @@ def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_ assert b"1 Imported" in res.data wait_for_all_checks(client) + assert len(client.application.config.get('DATASTORE').data['watching']) for k,v in client.application.config.get('DATASTORE').data['watching'].items(): assert v.get('last_error') == False assert v.get('has_ldjson_price_data') == has_ldjson_price_data, f"Detected LDJSON data? should be {has_ldjson_price_data}" @@ -163,7 +166,7 @@ def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_ client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) -def test_bad_ldjson_is_correctly_ignored(client, live_server, measure_memory_usage): +def test_bad_ldjson_is_correctly_ignored(client, live_server): #live_server_setup(live_server) test_return_data = """ <html> From 35d3ebeba5d6e56b6d9fc40cdfe244bb0606a918 Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Fri, 21 Mar 2025 10:42:25 +0100 Subject: [PATCH 14/21] Adding organisational UI tags --- changedetectionio/processors/__init__.py | 10 ++++++++++ changedetectionio/processors/pluggy_interface.py | 16 ++++++++++++++++ .../processors/restock_diff/processor.py | 1 + .../processors/text_json_diff/processor.py | 1 + 4 files changed, 28 insertions(+) diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index f5cb215b0ff..e05fd221ff2 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -300,6 +300,11 @@ def get_processor_description(self): def get_processor_version(self): from changedetectionio import __version__ return __version__ + + @hookimpl + def get_processor_ui_tag(self): + from .text_json_diff.processor import UI_tag + return UI_tag @hookimpl def perform_site_check(self, datastore, watch_uuid): @@ -337,6 +342,11 @@ def get_processor_description(self): def get_processor_version(self): from changedetectionio import __version__ return __version__ + + @hookimpl + def get_processor_ui_tag(self): + from .restock_diff.processor import UI_tag + return UI_tag @hookimpl def perform_site_check(self, datastore, watch_uuid): diff --git a/changedetectionio/processors/pluggy_interface.py b/changedetectionio/processors/pluggy_interface.py index 350821d362e..b757769aeea 100644 --- a/changedetectionio/processors/pluggy_interface.py +++ b/changedetectionio/processors/pluggy_interface.py @@ -7,6 +7,7 @@ hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE) hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE) +UI_tags = {} class ProcessorSpec: """Hook specifications for difference detection processors.""" @@ -26,6 +27,11 @@ def get_processor_version(): """Return the processor plugin version.""" pass + @hookspec + def get_processor_ui_tag(): + """Return the UI tag for the processor (used for categorization in UI).""" + pass + @hookspec def perform_site_check(datastore, watch_uuid): """Return the processor handler class or None if not applicable. @@ -61,9 +67,19 @@ def get_watch_model_class(processor_name): # Register hookspecs plugin_manager.add_hookspecs(ProcessorSpec) +# Initialize by loading plugins and building UI_tags dictionary try: # Discover installed plugins from external packages (if any) plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE) logger.info(f"Loaded plugins: {plugin_manager.get_plugins()}") + + # Build UI_tags dictionary from all plugins + for plugin in plugin_manager.get_plugins(): + if hasattr(plugin, "get_processor_name") and hasattr(plugin, "get_processor_ui_tag"): + plugin_name = plugin.get_processor_name() + ui_tag = plugin.get_processor_ui_tag() + if plugin_name and ui_tag: + UI_tags[plugin_name] = ui_tag + logger.info(f"Found UI tag for plugin {plugin_name}: {ui_tag}") except Exception as e: logger.critical(f"Error loading plugins: {str(e)}") \ No newline at end of file diff --git a/changedetectionio/processors/restock_diff/processor.py b/changedetectionio/processors/restock_diff/processor.py index 8242f4e8ad6..b098a6ef026 100644 --- a/changedetectionio/processors/restock_diff/processor.py +++ b/changedetectionio/processors/restock_diff/processor.py @@ -9,6 +9,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) name = 'Re-stock & Price detection for single product pages' description = 'Detects if the product goes back to in-stock' +UI_tag = "Restock" class UnableToExtractRestockData(Exception): def __init__(self, status_code): diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py index 5ccc8ce5f3f..985a7b05b55 100644 --- a/changedetectionio/processors/text_json_diff/processor.py +++ b/changedetectionio/processors/text_json_diff/processor.py @@ -17,6 +17,7 @@ name = 'Webpage Text/HTML, JSON and PDF changes' description = 'Detects all text changes where possible' +UI_tag = "Text Diff" json_filter_prefixes = ['json:', 'jq:', 'jqraw:'] From 63a8802f32e08259b4e56055fc02548bfb6dc099 Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Fri, 21 Mar 2025 11:00:35 +0100 Subject: [PATCH 15/21] Tidy up model def and clean up API endpoint --- changedetectionio/api/api_v1.py | 36 ++-- changedetectionio/model/__init__.py | 246 ++++++++++++++-------------- changedetectionio/store.py | 2 +- changedetectionio/tests/test_api.py | 5 +- 4 files changed, 146 insertions(+), 143 deletions(-) diff --git a/changedetectionio/api/api_v1.py b/changedetectionio/api/api_v1.py index fc09bb1b3a7..9e03f2e294d 100644 --- a/changedetectionio/api/api_v1.py +++ b/changedetectionio/api/api_v1.py @@ -12,11 +12,10 @@ # See docs/README.md for rebuilding the docs/apidoc information from . import api_schema -from ..model import watch_base +from ..model import schema as watch_schema # Build a JSON Schema atleast partially based on our Watch model -watch_base_config = watch_base() -schema = api_schema.build_watch_json_schema(watch_base_config) +schema = api_schema.build_watch_json_schema(watch_schema) schema_create_watch = copy.deepcopy(schema) schema_create_watch['required'] = ['url'] @@ -53,9 +52,9 @@ def get(self, uuid): @apiSuccess (200) {JSON} WatchJSON JSON Full JSON object of the watch """ from copy import deepcopy - watch = deepcopy(self.datastore.data['watching'].get(uuid)) + watch = self.datastore.data['watching'].get(uuid) if not watch: - abort(404, message='No watch exists with the UUID of {}'.format(uuid)) + abort(404, message=f'No watch exists with the UUID of {uuid}') if request.args.get('recheck'): self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) @@ -73,13 +72,16 @@ def get(self, uuid): self.datastore.data['watching'].get(uuid).unmute() return "OK", 200 - # Return without history, get that via another API call - # Properties are not returned as a JSON, so add the required props manually - watch['history_n'] = watch.history_n - # attr .last_changed will check for the last written text snapshot on change - watch['last_changed'] = watch.last_changed - watch['viewed'] = watch.viewed - return watch + + response = dict(watch.get_data()) + + # Add properties that aren't included in the standard dictionary items (they are properties/attr) + response['history_n'] = watch.history_n + response['last_changed'] = watch.last_changed + response['viewed'] = watch.viewed + response['title'] = watch.get('title') + + return response @auth.check_token def delete(self, uuid): @@ -114,16 +116,16 @@ def put(self, uuid): @apiSuccess (200) {String} OK Was updated @apiSuccess (500) {String} ERR Some other error """ - watch = self.datastore.data['watching'].get(uuid) - if not watch: - abort(404, message='No watch exists with the UUID of {}'.format(uuid)) + + if not self.datastore.data['watching'].get(uuid): + abort(404, message=f'No watch exists with the UUID of {uuid}') if request.json.get('proxy'): plist = self.datastore.proxy_list if not request.json.get('proxy') in plist: - return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400 + return f"Invalid proxy choice, currently supported proxies are '{', '.join(plist)}'", 400 - watch.update(request.json) + self.datastore.data['watching'][uuid].update(request.json) return "OK", 200 diff --git a/changedetectionio/model/__init__.py b/changedetectionio/model/__init__.py index ad926c1306b..fc7c579515d 100644 --- a/changedetectionio/model/__init__.py +++ b/changedetectionio/model/__init__.py @@ -1,134 +1,138 @@ import os import uuid +from copy import deepcopy from changedetectionio import strtobool from changedetectionio.notification import default_notification_format_for_watch +schema = { + # Custom notification content + # Re #110, so then if this is set to None, we know to use the default value instead + # Requires setting to None on submit if it's the same as the default + # Should be all None by default, so we use the system default in this case. + 'body': None, + 'browser_steps': [], + 'browser_steps_last_error_step': None, + 'check_count': 0, + 'check_unique_lines': False, # On change-detected, compare against all history if its something new + 'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. + 'content-type': None, + 'date_created': None, + 'extract_text': [], # Extract text by regex after filters + 'extract_title_as_title': False, + 'fetch_backend': 'system', # plaintext, playwright etc + 'fetch_time': 0.0, + 'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), + 'filter_text_added': True, + 'filter_text_removed': True, + 'filter_text_replaced': True, + 'follow_price_changes': True, + 'has_ldjson_price_data': None, + 'headers': {}, # Extra headers to send + 'ignore_text': [], # List of text to ignore when calculating the comparison checksum + 'in_stock_only': True, # Only trigger change on going to instock from out-of-stock + 'include_filters': [], + 'last_checked': 0, + 'last_error': False, + 'last_viewed': 0, # history key value of the last viewed via the [diff] link + 'method': 'GET', + 'notification_alert_count': 0, + 'notification_body': None, + 'notification_format': default_notification_format_for_watch, + 'notification_muted': False, + 'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL + 'notification_title': None, + 'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise) + 'paused': False, + 'previous_md5': False, + 'previous_md5_before_filters': False, # Used for skipping changedetection entirely + 'processor': 'text_json_diff', # could be restock_diff or others from .processors + 'price_change_threshold_percent': None, + 'proxy': None, # Preferred proxy connection + 'remote_server_reply': None, # From 'server' reply header + 'sort_text_alphabetically': False, + 'subtractive_selectors': [], + 'tag': '', # Old system of text name for a tag, to be removed + 'tags': [], # list of UUIDs to App.Tags + 'text_should_not_be_present': [], # Text that should not present + 'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, + 'time_between_check_use_default': True, + "time_schedule_limit": { + "enabled": False, + "monday": { + "enabled": True, + "start_time": "00:00", + "duration": { + "hours": "24", + "minutes": "00" + } + }, + "tuesday": { + "enabled": True, + "start_time": "00:00", + "duration": { + "hours": "24", + "minutes": "00" + } + }, + "wednesday": { + "enabled": True, + "start_time": "00:00", + "duration": { + "hours": "24", + "minutes": "00" + } + }, + "thursday": { + "enabled": True, + "start_time": "00:00", + "duration": { + "hours": "24", + "minutes": "00" + } + }, + "friday": { + "enabled": True, + "start_time": "00:00", + "duration": { + "hours": "24", + "minutes": "00" + } + }, + "saturday": { + "enabled": True, + "start_time": "00:00", + "duration": { + "hours": "24", + "minutes": "00" + } + }, + "sunday": { + "enabled": True, + "start_time": "00:00", + "duration": { + "hours": "24", + "minutes": "00" + } + }, + }, + 'title': None, + 'track_ldjson_price_data': None, + 'trim_text_whitespace': False, + 'remove_duplicate_lines': False, + 'trigger_text': [], # List of text or regex to wait for until a change is detected + 'url': '', + 'uuid': None, + 'webdriver_delay': None, + 'webdriver_js_execute_code': None, # Run before change-detection +} + class watch_base(dict): def __init__(self, *arg, **kw): # Initialize internal data storage - self.__data = { - # Custom notification content - # Re #110, so then if this is set to None, we know to use the default value instead - # Requires setting to None on submit if it's the same as the default - # Should be all None by default, so we use the system default in this case. - 'body': None, - 'browser_steps': [], - 'browser_steps_last_error_step': None, - 'check_count': 0, - 'check_unique_lines': False, # On change-detected, compare against all history if its something new - 'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. - 'content-type': None, - 'date_created': None, - 'extract_text': [], # Extract text by regex after filters - 'extract_title_as_title': False, - 'fetch_backend': 'system', # plaintext, playwright etc - 'fetch_time': 0.0, - 'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), - 'filter_text_added': True, - 'filter_text_removed': True, - 'filter_text_replaced': True, - 'follow_price_changes': True, - 'has_ldjson_price_data': None, - 'headers': {}, # Extra headers to send - 'ignore_text': [], # List of text to ignore when calculating the comparison checksum - 'in_stock_only': True, # Only trigger change on going to instock from out-of-stock - 'include_filters': [], - 'last_checked': 0, - 'last_error': False, - 'last_viewed': 0, # history key value of the last viewed via the [diff] link - 'method': 'GET', - 'notification_alert_count': 0, - 'notification_body': None, - 'notification_format': default_notification_format_for_watch, - 'notification_muted': False, - 'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL - 'notification_title': None, - 'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise) - 'paused': False, - 'previous_md5': False, - 'previous_md5_before_filters': False, # Used for skipping changedetection entirely - 'processor': 'text_json_diff', # could be restock_diff or others from .processors - 'price_change_threshold_percent': None, - 'proxy': None, # Preferred proxy connection - 'remote_server_reply': None, # From 'server' reply header - 'sort_text_alphabetically': False, - 'subtractive_selectors': [], - 'tag': '', # Old system of text name for a tag, to be removed - 'tags': [], # list of UUIDs to App.Tags - 'text_should_not_be_present': [], # Text that should not present - 'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, - 'time_between_check_use_default': True, - "time_schedule_limit": { - "enabled": False, - "monday": { - "enabled": True, - "start_time": "00:00", - "duration": { - "hours": "24", - "minutes": "00" - } - }, - "tuesday": { - "enabled": True, - "start_time": "00:00", - "duration": { - "hours": "24", - "minutes": "00" - } - }, - "wednesday": { - "enabled": True, - "start_time": "00:00", - "duration": { - "hours": "24", - "minutes": "00" - } - }, - "thursday": { - "enabled": True, - "start_time": "00:00", - "duration": { - "hours": "24", - "minutes": "00" - } - }, - "friday": { - "enabled": True, - "start_time": "00:00", - "duration": { - "hours": "24", - "minutes": "00" - } - }, - "saturday": { - "enabled": True, - "start_time": "00:00", - "duration": { - "hours": "24", - "minutes": "00" - } - }, - "sunday": { - "enabled": True, - "start_time": "00:00", - "duration": { - "hours": "24", - "minutes": "00" - } - }, - }, - 'title': None, - 'track_ldjson_price_data': None, - 'trim_text_whitespace': False, - 'remove_duplicate_lines': False, - 'trigger_text': [], # List of text or regex to wait for until a change is detected - 'url': '', - 'uuid': None, - 'webdriver_delay': None, - 'webdriver_js_execute_code': None, # Run before change-detection - } + + self.__data = deepcopy(schema) # Initialize as empty dict but maintain dict interface super(watch_base, self).__init__() diff --git a/changedetectionio/store.py b/changedetectionio/store.py index f0fb76ed5cf..86fb31bdaeb 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -410,7 +410,7 @@ def save_datastore(self): logger.remove() logger.add(sys.stderr) - logger.critical("Shutting down datastore thread") + logger.info("Shutting down datastore thread") return if self.needs_write or self.needs_write_urgent: diff --git a/changedetectionio/tests/test_api.py b/changedetectionio/tests/test_api.py index 097133fef57..584ca7e71bb 100644 --- a/changedetectionio/tests/test_api.py +++ b/changedetectionio/tests/test_api.py @@ -57,8 +57,7 @@ def test_setup(client, live_server, measure_memory_usage): def test_api_simple(client, live_server, measure_memory_usage): -# live_server_setup(live_server) - + #live_server_setup(live_server) api_key = extract_api_key_from_UI(client) # Create a watch @@ -291,7 +290,6 @@ def test_access_denied(client, live_server, measure_memory_usage): assert b"Settings updated." in res.data def test_api_watch_PUT_update(client, live_server, measure_memory_usage): - #live_server_setup(live_server) api_key = extract_api_key_from_UI(client) @@ -373,7 +371,6 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage): def test_api_import(client, live_server, measure_memory_usage): - #live_server_setup(live_server) api_key = extract_api_key_from_UI(client) res = client.post( From d01032b639eb0e75ed3fdd987704d2f8581505f3 Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Fri, 21 Mar 2025 11:21:06 +0100 Subject: [PATCH 16/21] Fix rehydratw --- changedetectionio/store.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 86fb31bdaeb..30e0288a0e9 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -87,6 +87,7 @@ def __init__(self, datastore_path="/datastore", include_default_watches=True, ve logger.info(f"Watching: {uuid} {watch['url']}") # And for Tags also, should be Restock type because it has extra settings + # @todo make this smarter! for uuid, tag in self.__data['settings']['application']['tags'].items(): self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(default_dict=tag, processor_override='restock_diff') logger.info(f"Tag: {uuid} {tag['title']}") @@ -144,7 +145,10 @@ def __init__(self, datastore_path="/datastore", include_default_watches=True, ve # Finally start the thread that will manage periodic data saves to JSON save_data_thread = threading.Thread(target=self.save_datastore).start() - def rehydrate_entity(self, default_dict: dict, processor_override='text_json_diff'): + def rehydrate_entity(self, default_dict: dict, processor_override=None): + + if not processor_override and default_dict.get('processor'): + processor_override = default_dict.get('processor') watch_class = get_watch_model_for_processor(processor_override) default_dict['processor'] = processor_override From b6195cf5af19da054dc3534e301c10bfad23acf9 Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Fri, 21 Mar 2025 12:39:11 +0100 Subject: [PATCH 17/21] always set default processor --- changedetectionio/store.py | 3 +- .../tests/test_restock_save_load_settings.py | 88 +++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 changedetectionio/tests/test_restock_save_load_settings.py diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 30e0288a0e9..941b487e018 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -149,7 +149,8 @@ def rehydrate_entity(self, default_dict: dict, processor_override=None): if not processor_override and default_dict.get('processor'): processor_override = default_dict.get('processor') - + if not processor_override: + processor_override = 'text_json_diff' watch_class = get_watch_model_for_processor(processor_override) default_dict['processor'] = processor_override entity = watch_class(datastore_path=self.datastore_path, default=default_dict) diff --git a/changedetectionio/tests/test_restock_save_load_settings.py b/changedetectionio/tests/test_restock_save_load_settings.py new file mode 100644 index 00000000000..de56f38b090 --- /dev/null +++ b/changedetectionio/tests/test_restock_save_load_settings.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +import os +import time +from flask import url_for +from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client + +def test_restock_settings_persistence(client, live_server): + """Test that restock processor and settings are correctly saved and loaded after app restart""" + + live_server_setup(live_server) + + # Create a test page with pricing information + test_return_data = """<html> + <body> + Some initial text<br> + <p>Which is across multiple lines</p> + <br> + So let's see what happens. <br> + <div>price: $10.99</div> + <div id="sametext">Out of stock</div> + </body> + </html> + """ + + with open("test-datastore/endpoint-content.txt", "w") as f: + f.write(test_return_data) + + # Add our URL to the import page (pointing to our test endpoint) + test_url = url_for('test_endpoint', _external=True) + + # Add a new watch with the restock_diff processor + res = client.post( + url_for("ui.ui_views.form_quick_watch_add"), + data={"url": test_url, "tags": '', 'processor': 'restock_diff'}, + follow_redirects=True + ) + + # Wait for initial check to complete + wait_for_all_checks(client) + + # Get the UUID of the watch + uuid = extract_UUID_from_client(client) + + # Set custom restock settings + res = client.post( + url_for("ui.ui_edit.edit_page", uuid=uuid), + data={ + "url": test_url, + "tags": "", + "headers": "", + "restock_settings-price_change_min": 10, + "restock_settings-price_change_threshold_percent": 5, + 'fetch_backend': "html_requests" + }, + follow_redirects=True + ) + + assert b"Updated watch." in res.data + + # Verify the settings were saved in the current datastore + app_config = client.application.config.get('DATASTORE').data + watch = app_config['watching'][uuid] + + assert watch.get('processor') == 'restock_diff' + assert watch['restock_settings'].get('price_change_min') == 10 + assert watch['restock_settings'].get('price_change_threshold_percent') == 5 + + # Restart the application by calling teardown and recreating the datastore + # This simulates shutting down and restarting the app + datastore = client.application.config.get('DATASTORE') + datastore.stop_thread = True + datastore.sync_to_json() # Force write to disk before recreating + + # Create a new datastore instance that will read from the saved JSON + from changedetectionio import store + new_datastore = store.ChangeDetectionStore(datastore_path="./test-datastore", include_default_watches=False) + client.application.config['DATASTORE'] = new_datastore + + # Verify the watch settings were correctly loaded after restart + app_config = client.application.config.get('DATASTORE').data + watch = app_config['watching'][uuid] + + # Check that processor mode is correctly preserved + assert watch.get('processor') == 'restock_diff', "Watch processor mode should be preserved as 'restock_diff'" + + # Check that the restock settings were correctly preserved + assert watch['restock_settings'].get('price_change_min') == 10, "price_change_min setting should be preserved" + assert watch['restock_settings'].get('price_change_threshold_percent') == 5, "price_change_threshold_percent setting should be preserved" \ No newline at end of file From 6c39c868f20dbdeb731c91f68fd8e13dec262206 Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Fri, 21 Mar 2025 13:04:20 +0100 Subject: [PATCH 18/21] New deep merge store method --- changedetectionio/model/__init__.py | 3 +- changedetectionio/store.py | 32 ++++++++++++---------- changedetectionio/tests/test_conditions.py | 6 ++-- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/changedetectionio/model/__init__.py b/changedetectionio/model/__init__.py index fc7c579515d..cfc06878446 100644 --- a/changedetectionio/model/__init__.py +++ b/changedetectionio/model/__init__.py @@ -128,6 +128,7 @@ } class watch_base(dict): + __data = {} def __init__(self, *arg, **kw): # Initialize internal data storage @@ -163,7 +164,7 @@ def __iter__(self): def __len__(self): return len(self.__data) - + def get(self, key, default=None): return self.__data.get(key, default) diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 941b487e018..3a2b1f29293 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -17,6 +17,7 @@ import time import uuid as uuid_builder from loguru import logger +from deepmerge import always_merger from .processors import get_watch_model_for_processor @@ -48,9 +49,6 @@ def __init__(self, datastore_path="/datastore", include_default_watches=True, ve self.needs_write = False self.start_time = time.time() self.stop_thread = False - # Base definition for all watchers - # deepcopy part of #569 - not sure why its needed exactly - self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={})) if path.isfile('changedetectionio/source.txt'): with open('changedetectionio/source.txt') as f: @@ -166,21 +164,27 @@ def remove_password(self): self.needs_write = True def update_watch(self, uuid, update_obj): - + """ + Update a watch with new values using the deepmerge library. + """ # It's possible that the watch could be deleted before update - if not self.__data['watching'].get(uuid): + if not uuid in self.data['watching'].keys() or update_obj is None: return with self.lock: - - # In python 3.9 we have the |= dict operator, but that still will lose data on nested structures... - for dict_key, d in self.generic_definition.items(): - if isinstance(d, dict): - if update_obj is not None and dict_key in update_obj: - self.__data['watching'][uuid][dict_key].update(update_obj[dict_key]) - del (update_obj[dict_key]) - - self.__data['watching'][uuid].update(update_obj) + # Make sure we're working with a proper Watch object + watch = self.data['watching'].get(uuid) + + # Handle None values - they mean "delete this key" + keys_to_remove = [k for k, v in update_obj.items() if v is None] + for k in keys_to_remove: + if k in watch: + del watch[k] + del update_obj[k] + + # Deep merge with the rest + always_merger.merge(watch, update_obj) + self.needs_write = True @property diff --git a/changedetectionio/tests/test_conditions.py b/changedetectionio/tests/test_conditions.py index 25f3a276978..2f3637fe44e 100644 --- a/changedetectionio/tests/test_conditions.py +++ b/changedetectionio/tests/test_conditions.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 import json -import urllib from flask import url_for from .util import live_server_setup, wait_for_all_checks @@ -44,12 +43,14 @@ def set_number_out_of_range_response(number="150"): with open("test-datastore/endpoint-content.txt", "w") as f: f.write(test_return_data) +def test_setup(live_server): + live_server_setup(live_server) def test_conditions_with_text_and_number(client, live_server): """Test that both text and number conditions work together with AND logic.""" set_original_response("50") - live_server_setup(live_server) +# live_server_setup(live_server) test_url = url_for('test_endpoint', _external=True) @@ -138,6 +139,7 @@ def test_conditions_with_text_and_number(client, live_server): def test_condition_validate_rule_row(client, live_server): set_original_response("50") + #live_server_setup(live_server) test_url = url_for('test_endpoint', _external=True) From efacc1cb6bf6569d689a7752efbd65de2205789c Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Fri, 21 Mar 2025 13:10:44 +0100 Subject: [PATCH 19/21] use deepmerge --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 07864359f6d..2c601473179 100644 --- a/requirements.txt +++ b/requirements.txt @@ -112,3 +112,4 @@ pluggy ~= 1.5 +deepmerge From 8e833a2d710047611794eb78fb93b183f049cded Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Fri, 21 Mar 2025 13:16:03 +0100 Subject: [PATCH 20/21] Store 'last_modified' time info --- changedetectionio/model/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/changedetectionio/model/__init__.py b/changedetectionio/model/__init__.py index cfc06878446..6684c4b0c06 100644 --- a/changedetectionio/model/__init__.py +++ b/changedetectionio/model/__init__.py @@ -1,6 +1,7 @@ import os import uuid from copy import deepcopy +import time from changedetectionio import strtobool from changedetectionio.notification import default_notification_format_for_watch @@ -34,6 +35,7 @@ 'include_filters': [], 'last_checked': 0, 'last_error': False, + 'last_modified': None, 'last_viewed': 0, # history key value of the last viewed via the [diff] link 'method': 'GET', 'notification_alert_count': 0, @@ -152,6 +154,7 @@ def __getitem__(self, key): def __setitem__(self, key, value): self.__data[key] = value + self.__data['last_modified'] = time.time() def __delitem__(self, key): del self.__data[key] @@ -178,6 +181,8 @@ def update(self, *args, **kwargs): for key in kwargs: self.__data[key] = kwargs[key] + self.__data['last_modified'] = time.time() + def items(self): return self.__data.items() @@ -195,6 +200,7 @@ def popitem(self): def clear(self): self.__data.clear() + self.__data['last_modified'] = time.time() def get_data(self): """Returns the internal data dictionary""" From 6d5970e55ab03e3867cc1e8705918d72b1c23cbb Mon Sep 17 00:00:00 2001 From: dgtlmoon <dgtlmoon@gmail.com> Date: Sat, 22 Mar 2025 22:13:25 +0100 Subject: [PATCH 21/21] WIP --- changedetectionio/__init__.py | 1 - changedetectionio/api/api_v1.py | 1 + .../blueprint/backups/__init__.py | 2 - .../blueprint/settings/__init__.py | 8 +- changedetectionio/blueprint/tags/__init__.py | 4 +- changedetectionio/blueprint/ui/__init__.py | 8 + changedetectionio/blueprint/ui/edit.py | 22 +- changedetectionio/flask_app.py | 1 - changedetectionio/model/Tag.py | 45 +++- changedetectionio/model/Watch.py | 13 +- changedetectionio/model/__init__.py | 45 +++- .../processors/restock_diff/__init__.py | 5 + changedetectionio/store.py | 235 ++++++++---------- changedetectionio/tests/test_conditions.py | 2 +- .../tests/test_restock_save_load_settings.py | 6 +- changedetectionio/update_worker.py | 2 +- 16 files changed, 233 insertions(+), 167 deletions(-) diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index f1b0daec666..54b612a44d7 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -33,7 +33,6 @@ def sigshutdown_handler(_signo, _stack_frame): global datastore name = signal.Signals(_signo).name logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Saving DB to disk and calling shutdown') - datastore.sync_to_json() logger.success('Sync JSON to disk complete.') # This will throw a SystemExit exception, because eventlet.wsgi.server doesn't know how to deal with it. # Solution: move to gevent or other server in the future (#2014) diff --git a/changedetectionio/api/api_v1.py b/changedetectionio/api/api_v1.py index 9e03f2e294d..e74cc1d500c 100644 --- a/changedetectionio/api/api_v1.py +++ b/changedetectionio/api/api_v1.py @@ -126,6 +126,7 @@ def put(self, uuid): return f"Invalid proxy choice, currently supported proxies are '{', '.join(plist)}'", 400 self.datastore.data['watching'][uuid].update(request.json) + self.datastore.data['watching'][uuid].save_data() return "OK", 200 diff --git a/changedetectionio/blueprint/backups/__init__.py b/changedetectionio/blueprint/backups/__init__.py index add44308c3e..e3710e02c96 100644 --- a/changedetectionio/blueprint/backups/__init__.py +++ b/changedetectionio/blueprint/backups/__init__.py @@ -89,8 +89,6 @@ def request_backup(): flash("Maximum number of backups reached, please remove some", "error") return redirect(url_for('backups.index')) - # Be sure we're written fresh - datastore.sync_to_json() zip_thread = threading.Thread(target=create_backup, args=(datastore.datastore_path, datastore.data.get("watching"))) zip_thread.start() backup_threads.append(zip_thread) diff --git a/changedetectionio/blueprint/settings/__init__.py b/changedetectionio/blueprint/settings/__init__.py index a8f71567d22..360c48fccba 100644 --- a/changedetectionio/blueprint/settings/__init__.py +++ b/changedetectionio/blueprint/settings/__init__.py @@ -71,12 +71,12 @@ def settings_page(): if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password): datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password - datastore.needs_write_urgent = True + datastore.save_settings() flash("Password protection enabled.", 'notice') flask_login.logout_user() return redirect(url_for('index')) - datastore.needs_write_urgent = True + datastore.save_settings() flash("Settings updated.") else: @@ -100,8 +100,7 @@ def settings_page(): datastore.data['settings']['application'].update(app_update) datastore.data['settings']['requests'].update(form.data['requests']) - - datastore.needs_write_urgent = True + datastore.save_settings() flash("Settings updated.") output = render_template("settings.html", @@ -125,7 +124,6 @@ def settings_page(): def settings_reset_api_key(): secret = secrets.token_hex(16) datastore.data['settings']['application']['api_access_token'] = secret - datastore.needs_write_urgent = True flash("API Key was regenerated.") return redirect(url_for('settings.settings_page')+'#api') diff --git a/changedetectionio/blueprint/tags/__init__.py b/changedetectionio/blueprint/tags/__init__.py index d7086213c8e..f580503a7eb 100644 --- a/changedetectionio/blueprint/tags/__init__.py +++ b/changedetectionio/blueprint/tags/__init__.py @@ -56,6 +56,7 @@ def form_tag_add(): def mute(uuid): if datastore.data['settings']['application']['tags'].get(uuid): datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = not datastore.data['settings']['application']['tags'][uuid]['notification_muted'] + datastore.data['settings']['application']['tags'][uuid].save_data() return redirect(url_for('tags.tags_overview_page')) @tags_blueprint.route("/delete/<string:uuid>", methods=['GET']) @@ -176,7 +177,8 @@ def form_tag_edit_submit(uuid): datastore.data['settings']['application']['tags'][uuid].update(form.data) datastore.data['settings']['application']['tags'][uuid]['processor'] = 'restock_diff' - datastore.needs_write_urgent = True + datastore.data['settings']['application']['tags'][uuid].save_data() + flash("Updated") return redirect(url_for('tags.tags_overview_page')) diff --git a/changedetectionio/blueprint/ui/__init__.py b/changedetectionio/blueprint/ui/__init__.py index 16f07f379ca..c320702a5fa 100644 --- a/changedetectionio/blueprint/ui/__init__.py +++ b/changedetectionio/blueprint/ui/__init__.py @@ -163,6 +163,7 @@ def form_watch_list_checkbox_operations(): uuid = uuid.strip() if datastore.data['watching'].get(uuid): datastore.data['watching'][uuid.strip()]['paused'] = True + datastore.data['watching'][uuid.strip()].save_data() flash("{} watches paused".format(len(uuids))) elif (op == 'unpause'): @@ -170,6 +171,7 @@ def form_watch_list_checkbox_operations(): uuid = uuid.strip() if datastore.data['watching'].get(uuid): datastore.data['watching'][uuid.strip()]['paused'] = False + datastore.data['watching'][uuid.strip()].save_data() flash("{} watches unpaused".format(len(uuids))) elif (op == 'mark-viewed'): @@ -184,6 +186,7 @@ def form_watch_list_checkbox_operations(): uuid = uuid.strip() if datastore.data['watching'].get(uuid): datastore.data['watching'][uuid.strip()]['notification_muted'] = True + datastore.data['watching'][uuid.strip()].save_data() flash("{} watches muted".format(len(uuids))) elif (op == 'unmute'): @@ -191,6 +194,7 @@ def form_watch_list_checkbox_operations(): uuid = uuid.strip() if datastore.data['watching'].get(uuid): datastore.data['watching'][uuid.strip()]['notification_muted'] = False + datastore.data['watching'][uuid.strip()].save_data() flash("{} watches un-muted".format(len(uuids))) elif (op == 'recheck'): @@ -206,6 +210,7 @@ def form_watch_list_checkbox_operations(): uuid = uuid.strip() if datastore.data['watching'].get(uuid): datastore.data['watching'][uuid]["last_error"] = False + datastore.data['watching'][uuid].save_data() flash(f"{len(uuids)} watches errors cleared") elif (op == 'clear-history'): @@ -244,6 +249,9 @@ def form_watch_list_checkbox_operations(): flash(f"{len(uuids)} watches were tagged") + for uuid in uuids: + datastore.data['watching'][uuid.strip()].save_data() + return redirect(url_for('index')) diff --git a/changedetectionio/blueprint/ui/edit.py b/changedetectionio/blueprint/ui/edit.py index 3a9d4532639..487084043f5 100644 --- a/changedetectionio/blueprint/ui/edit.py +++ b/changedetectionio/blueprint/ui/edit.py @@ -49,8 +49,8 @@ def edit_page(uuid): datastore.clear_watch_history(uuid) redirect(url_for('ui_edit.edit_page', uuid=uuid)) - # be sure we update with a copy instead of accidently editing the live object by reference - default = deepcopy(datastore.data['watching'][uuid]) + + default = datastore.data['watching'][uuid] # Defaults for proxy choice if datastore.proxy_list is not None: # When enabled @@ -114,10 +114,7 @@ def edit_page(uuid): extra_update_obj['paused'] = False extra_update_obj['time_between_check'] = form.time_between_check.data - - # Ignore text - form_ignore_text = form.ignore_text.data - datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text + extra_update_obj['ignore_text'] = form.ignore_text.data # Be sure proxy value is None if datastore.proxy_list is not None and form.data['proxy'] == '': @@ -143,23 +140,23 @@ def edit_page(uuid): tag_uuids.append(datastore.add_tag(name=t)) extra_update_obj['tags'] = tag_uuids - datastore.data['watching'][uuid].update(form.data) - datastore.data['watching'][uuid].update(extra_update_obj) + + if not datastore.data['watching'][uuid].get('tags'): # Force it to be a list, because form.data['tags'] will be string if nothing found # And del(form.data['tags'] ) wont work either for some reason datastore.data['watching'][uuid]['tags'] = [] + datastore.update_watch(uuid=uuid, update_obj=form.data | extra_update_obj) + # Recast it if need be to right data Watch handler - processor_name = form.data.get('processor') + processor_name = datastore.data['watching'][uuid].get('processor') watch_class = processors.get_watch_model_for_processor(processor_name) datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, default=datastore.data['watching'][uuid]) + datastore.data['watching'][uuid].save_data() flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.") - # Re #286 - We wait for syncing new data to disk in another thread every 60 seconds - # But in the case something is added we should save straight away - datastore.needs_write_urgent = True # Do not queue on edit if its not within the time range @@ -186,6 +183,7 @@ def edit_page(uuid): f"{uuid} - Recheck scheduler, error handling timezone, check skipped - TZ name '{tz_name}' - {str(e)}") return False + ############################# if not datastore.data['watching'][uuid].get('paused') and is_in_schedule: # Queue the watch for immediate recheck, with a higher priority diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index f4d9f0dff1b..6ba1461db23 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -370,7 +370,6 @@ def index(): elif op == 'mute': datastore.data['watching'][uuid].toggle_mute() - datastore.needs_write = True return redirect(url_for('index', tag = active_tag_uuid)) # Sort by last_changed and add the uuid which is usually the key.. diff --git a/changedetectionio/model/Tag.py b/changedetectionio/model/Tag.py index 6dca480cdde..7eb09b8214c 100644 --- a/changedetectionio/model/Tag.py +++ b/changedetectionio/model/Tag.py @@ -1,14 +1,57 @@ -from changedetectionio.model import watch_base +import os +import json +import uuid as uuid_builder +import time +from copy import deepcopy +from loguru import logger + +from changedetectionio.model import watch_base, schema class model(watch_base): + """Tag model that writes to tags/{uuid}/tag.json instead of the main watch directory""" + __datastore_path = None def __init__(self, *arg, **kw): super(model, self).__init__(*arg, **kw) + self.__datastore_path = kw.get("datastore_path") self['overrides_watch'] = kw.get('default', {}).get('overrides_watch') if kw.get('default'): self.update(kw['default']) del kw['default'] + + @property + def watch_data_dir(self): + # Override to use tags directory instead of the normal watch data directory + datastore_path = getattr(self, '_model__datastore_path', None) + if datastore_path: + tags_path = os.path.join(datastore_path, 'tags') + # Make sure the tags directory exists + if not os.path.exists(tags_path): + os.makedirs(tags_path) + return os.path.join(tags_path, self['uuid']) + return None + + def save_data(self): + """Override to save tag to tags/{uuid}/tag.json""" + logger.debug(f"Saving tag {self['uuid']}") + + if not self.get('uuid'): + # Might have been called when creating the tag + return + + tags_path = os.path.join(self.__datastore_path, 'tags') + if not os.path.isdir(tags_path): + os.mkdir(os.path.join(tags_path)) + + path = os.path.join(tags_path, self.get('uuid')+".json") + try: + with open(path + ".tmp", 'w') as json_file: + json.dump(self.get_data(), json_file, indent=4) + os.replace(path + ".tmp", path) + except Exception as e: + logger.error(f"Error writing JSON for tag {self.get('uuid')}!! (JSON file save was skipped) : {str(e)}") + diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index f91371c5718..f5b1173bcbe 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -38,18 +38,13 @@ class model(watch_base): jitter_seconds = 0 def __init__(self, *arg, **kw): - self.__datastore_path = kw.get('datastore_path') - if kw.get('datastore_path'): - del kw['datastore_path'] + super(model, self).__init__(*arg, **kw) if kw.get('default'): self.update(kw['default']) del kw['default'] - if self.get('default'): - del self['default'] - # Be sure the cached timestamp is ready bump = self.history @@ -301,6 +296,7 @@ def get_history_snapshot(self, timestamp): # result_obj from fetch_site_status.run() def save_history_text(self, contents, timestamp, snapshot_id): import brotli + import tempfile logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}") @@ -417,11 +413,6 @@ def snapshot_screenshot_ctime(self): def snapshot_error_screenshot_ctime(self): return self.__get_file_ctime('last-error-screenshot.png') - @property - def watch_data_dir(self): - # The base dir of the watch data - return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None - def get_error_text(self): """Return the text saved from a previous request that resulted in a non-200 error""" fname = os.path.join(self.watch_data_dir, "last-error.txt") diff --git a/changedetectionio/model/__init__.py b/changedetectionio/model/__init__.py index 6684c4b0c06..ddcba242434 100644 --- a/changedetectionio/model/__init__.py +++ b/changedetectionio/model/__init__.py @@ -1,7 +1,9 @@ import os import uuid from copy import deepcopy +from loguru import logger import time +import json from changedetectionio import strtobool from changedetectionio.notification import default_notification_format_for_watch @@ -49,6 +51,7 @@ 'previous_md5': False, 'previous_md5_before_filters': False, # Used for skipping changedetection entirely 'processor': 'text_json_diff', # could be restock_diff or others from .processors + 'processor_state': {}, # Extra configs for custom processors/plugins, keyed by processor name 'price_change_threshold_percent': None, 'proxy': None, # Preferred proxy connection 'remote_server_reply': None, # From 'server' reply header @@ -131,12 +134,14 @@ class watch_base(dict): __data = {} + __datastore_path = None + __save_enabled = True def __init__(self, *arg, **kw): # Initialize internal data storage self.__data = deepcopy(schema) - + self.__datastore_path = kw.pop('datastore_path', None) # Initialize as empty dict but maintain dict interface super(watch_base, self).__init__() @@ -147,7 +152,18 @@ def __init__(self, *arg, **kw): # Generate UUID if needed if not self.__data.get('uuid'): self.__data['uuid'] = str(uuid.uuid4()) - + + if self.__data.get('default'): + del(self.__data['default']) + + @property + def watch_data_dir(self): + # The base dir of the watch data + return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None + + def enable_saving(self): + self.__save_enabled = True + # Dictionary interface methods to use self.__data def __getitem__(self, key): return self.__data[key] @@ -155,7 +171,7 @@ def __getitem__(self, key): def __setitem__(self, key, value): self.__data[key] = value self.__data['last_modified'] = time.time() - + def __delitem__(self, key): del self.__data[key] @@ -205,3 +221,26 @@ def clear(self): def get_data(self): """Returns the internal data dictionary""" return self.__data + + def save_data(self): + if self.__save_enabled: + if not self.__data.get('uuid'): + # Might have been called when creating the watch + return + + logger.debug(f"Saving watch {self['uuid']}") + path = os.path.join(self.__datastore_path, self.get('uuid')) + filepath = os.path.join(str(path), "watch.json") + if not os.path.exists(path): + os.mkdir(path) + + try: + import tempfile + with tempfile.NamedTemporaryFile(mode='wb+', delete=False) as tmp: + tmp.write(json.dumps(self.get_data(), indent=2).encode('utf-8')) + tmp.flush() + os.replace(tmp.name, filepath) + + + except Exception as e: + logger.error(f"Error writing JSON for {self.get('uuid')}!! (JSON file save was skipped) : {str(e)}") diff --git a/changedetectionio/processors/restock_diff/__init__.py b/changedetectionio/processors/restock_diff/__init__.py index c02813e9b3a..2229a5b6c75 100644 --- a/changedetectionio/processors/restock_diff/__init__.py +++ b/changedetectionio/processors/restock_diff/__init__.py @@ -56,8 +56,13 @@ def __setitem__(self, key, value): super().__setitem__(key, value) class Watch(BaseWatch): + + def load_extra_vars(self): + # something from disk? + def __init__(self, *arg, **kw): super().__init__(*arg, **kw) + # Restock Obj helps with the state of the situation self['restock'] = Restock(kw['default']['restock']) if kw.get('default') and kw['default'].get('restock') else Restock() self['restock_settings'] = kw['default']['restock_settings'] if kw.get('default',{}).get('restock_settings') else { diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 3a2b1f29293..0c9ec5ca114 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -31,11 +31,6 @@ # https://stackoverflow.com/questions/6190468/how-to-trigger-function-on-value-change class ChangeDetectionStore: lock = Lock() - # For general updates/writes that can wait a few seconds - needs_write = False - - # For when we edit, we should write to disk - needs_write_urgent = False __version_check = True @@ -46,7 +41,7 @@ def __init__(self, datastore_path="/datastore", include_default_watches=True, ve self.datastore_path = datastore_path self.json_store_path = "{}/url-watches.json".format(self.datastore_path) logger.info(f"Datastore path is '{self.json_store_path}'") - self.needs_write = False + self.start_time = time.time() self.stop_thread = False @@ -56,39 +51,30 @@ def __init__(self, datastore_path="/datastore", include_default_watches=True, ve # So when someone gives us a backup file to examine, we know exactly what code they were running. self.__data['build_sha'] = f.read() - try: - # @todo retest with ", encoding='utf-8'" - with open(self.json_store_path) as json_file: - from_disk = json.load(json_file) - - # @todo isnt there a way todo this dict.update recursively? - # Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore. - if 'watching' in from_disk: - self.__data['watching'].update(from_disk['watching']) - - if 'app_guid' in from_disk: - self.__data['app_guid'] = from_disk['app_guid'] - - if 'settings' in from_disk: - if 'headers' in from_disk['settings']: - self.__data['settings']['headers'].update(from_disk['settings']['headers']) + self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={})) - if 'requests' in from_disk['settings']: - self.__data['settings']['requests'].update(from_disk['settings']['requests']) - - if 'application' in from_disk['settings']: - self.__data['settings']['application'].update(from_disk['settings']['application']) + try: + import os + # First load global settings from the main JSON file if it exists + if os.path.isfile(self.json_store_path): + with open(self.json_store_path) as json_file: + from_disk = json.load(json_file) + + # Load app_guid and settings from the main JSON file + if 'app_guid' in from_disk: + self.__data['app_guid'] = from_disk['app_guid'] + + if 'settings' in from_disk: + if 'headers' in from_disk['settings']: + self.__data['settings']['headers'].update(from_disk['settings']['headers']) + + if 'requests' in from_disk['settings']: + self.__data['settings']['requests'].update(from_disk['settings']['requests']) + + if 'application' in from_disk['settings']: + self.__data['settings']['application'].update(from_disk['settings']['application']) - # Convert each existing watch back to the Watch.model object - for uuid, watch in self.__data['watching'].items(): - self.__data['watching'][uuid] = self.rehydrate_entity(default_dict=watch) - logger.info(f"Watching: {uuid} {watch['url']}") - # And for Tags also, should be Restock type because it has extra settings - # @todo make this smarter! - for uuid, tag in self.__data['settings']['application']['tags'].items(): - self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(default_dict=tag, processor_override='restock_diff') - logger.info(f"Tag: {uuid} {tag['title']}") # First time ran, Create the datastore. except (FileNotFoundError): @@ -107,6 +93,8 @@ def __init__(self, datastore_path="/datastore", include_default_watches=True, ve else: # Bump the update version by running updates + self.scan_load_watches() + self.scan_load_tags() self.run_updates() self.__data['version_tag'] = version_tag @@ -138,10 +126,53 @@ def __init__(self, datastore_path="/datastore", include_default_watches=True, ve secret = secrets.token_hex(16) self.__data['settings']['application']['api_access_token'] = secret - self.needs_write = True + def scan_load_watches(self): - # Finally start the thread that will manage periodic data saves to JSON - save_data_thread = threading.Thread(target=self.save_datastore).start() + # Now scan for individual watch.json files in the datastore directory + import pathlib + watch_jsons = list(pathlib.Path(self.datastore_path).rglob("*/watch.json")) + + for watch_file in watch_jsons: + # Extract UUID from the directory name (parent directory of watch.json) + uuid = watch_file.parent.name + + try: + with open(watch_file, 'r') as f: + watch_data = json.load(f) + # Create a Watch object and add it to the datastore + self.__data['watching'][uuid] = self.rehydrate_entity(default_dict=watch_data) + logger.info(f"Watching: {uuid} {watch_data.get('url')}") + + except Exception as e: + logger.error(f"Error loading watch from {watch_file}: {str(e)}") + continue + logger.debug(f"{len(self.__data['watching'])} watches loaded.") + + def scan_load_tags(self): + import pathlib + # Now scan for individual tag.json files in the tags directory + tags_path = os.path.join(self.datastore_path, 'tags') + if os.path.exists(tags_path): + tag_jsons = list(pathlib.Path(tags_path).rglob("*.json")) + + for tag_file in tag_jsons: + # Extract UUID from the directory name (parent directory of tag.json) + + try: + with open(tag_file, 'r') as f: + tag_data = json.load(f) + uuid = str(tag_file).replace('.json', '') + tag_data['uuid'] = uuid + # Create a Tag object and add it to the datastore + self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity( + default_dict=tag_data, + processor_override='restock_diff' + ) + logger.info(f"Tag: {uuid} {tag_data.get('title', 'No title found')}") + except Exception as e: + logger.error(f"Error loading tag from {tag_file}: {str(e)}") + continue + logger.debug(f"{len(self.__data['settings']['application']['tags'])} tags loaded.") def rehydrate_entity(self, default_dict: dict, processor_override=None): @@ -152,16 +183,17 @@ def rehydrate_entity(self, default_dict: dict, processor_override=None): watch_class = get_watch_model_for_processor(processor_override) default_dict['processor'] = processor_override entity = watch_class(datastore_path=self.datastore_path, default=default_dict) + entity.enable_saving() return entity def set_last_viewed(self, uuid, timestamp): logger.debug(f"Setting watch UUID: {uuid} last viewed to {int(timestamp)}") self.data['watching'][uuid].update({'last_viewed': int(timestamp)}) - self.needs_write = True + self.data['watching'][uuid].save_data() def remove_password(self): self.__data['settings']['application']['password'] = False - self.needs_write = True + self.save_settings() def update_watch(self, uuid, update_obj): """ @@ -171,21 +203,16 @@ def update_watch(self, uuid, update_obj): if not uuid in self.data['watching'].keys() or update_obj is None: return - with self.lock: - # Make sure we're working with a proper Watch object - watch = self.data['watching'].get(uuid) - - # Handle None values - they mean "delete this key" - keys_to_remove = [k for k, v in update_obj.items() if v is None] - for k in keys_to_remove: - if k in watch: - del watch[k] - del update_obj[k] - - # Deep merge with the rest - always_merger.merge(watch, update_obj) - - self.needs_write = True + # In python 3.9 we have the |= dict operator, but that still will lose data on nested structures... + for dict_key, d in self.generic_definition.items(): + if isinstance(d, dict): + if update_obj is not None and dict_key in update_obj: + self.__data['watching'][uuid][dict_key].update(update_obj[dict_key]) + del (update_obj[dict_key]) + + self.__data['watching'][uuid].update(update_obj) + self.__data['watching'][uuid].save_data() + @property def threshold_seconds(self): @@ -245,8 +272,6 @@ def delete(self, uuid): shutil.rmtree(path) del self.data['watching'][uuid] - self.needs_write_urgent = True - # Clone a watch by UUID def clone(self, uuid): url = self.data['watching'][uuid].get('url') @@ -266,7 +291,6 @@ def url_exists(self, url): # Remove a watchs data but keep the entry (URL etc) def clear_watch_history(self, uuid): self.__data['watching'][uuid].clear_watch() - self.needs_write_urgent = True def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True): import requests @@ -357,15 +381,11 @@ def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now= if not apply_extras.get('date_created'): apply_extras['date_created'] = int(time.time()) - - new_watch.update(apply_extras) new_watch.ensure_data_dir_exists() - self.__data['watching'][new_uuid] = new_watch - - - if write_to_disk_now: - self.sync_to_json() + new_watch.update(apply_extras) + self.__data['watching'][new_uuid] = new_watch + self.__data['watching'][new_uuid].save_data() logger.debug(f"Added '{url}'") return new_uuid @@ -379,58 +399,22 @@ def visualselector_data_is_ready(self, watch_uuid): return False - def sync_to_json(self): - logger.info("Saving JSON..") + def save_settings(self): + logger.info("Saving application settings...") try: - data = deepcopy(self.__data) - except RuntimeError as e: - # Try again in 15 seconds - time.sleep(15) - logger.error(f"! Data changed when writing to JSON, trying again.. {str(e)}") - self.sync_to_json() - return - else: - - try: - # Re #286 - First write to a temp file, then confirm it looks OK and rename it - # This is a fairly basic strategy to deal with the case that the file is corrupted, - # system was out of memory, out of RAM etc - with open(self.json_store_path+".tmp", 'w') as json_file: - json.dump(data, json_file, indent=4) - os.replace(self.json_store_path+".tmp", self.json_store_path) - except Exception as e: - logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}") - - self.needs_write = False - self.needs_write_urgent = False - - # Thread runner, this helps with thread/write issues when there are many operations that want to update the JSON - # by just running periodically in one thread, according to python, dict updates are threadsafe. - def save_datastore(self): - - while True: - if self.stop_thread: - # Suppressing "Logging error in Loguru Handler #0" during CICD. - # Not a meaningful difference for a real use-case just for CICD. - # the side effect is a "Shutting down datastore thread" message - # at the end of each test. - # But still more looking better. - import sys - logger.remove() - logger.add(sys.stderr) - - logger.info("Shutting down datastore thread") - return - - if self.needs_write or self.needs_write_urgent: - self.sync_to_json() - - # Once per minute is enough, more and it can cause high CPU usage - # better here is to use something like self.app.config.exit.wait(1), but we cant get to 'app' from here - for i in range(120): - time.sleep(0.5) - if self.stop_thread or self.needs_write_urgent: - break + # Only save app settings, not the watches or tags (they're saved individually) + data = {'settings': self.__data.get('settings')} + #data = deepcopy(self.__data) + + # Remove the watches from the main JSON file + if 'watching' in data: + del data['watching'] + + # Remove the tags from the main JSON file since they're saved individually now +# if 'settings' in data and 'application' in data['settings'] and 'tags' in data['settings']['application']: +# del data['settings']['application']['tags'] + except Exception as e: + x=1 # Go through the datastore path and remove any snapshots that are not mentioned in the index # This usually is not used, but can be handy. @@ -584,16 +568,17 @@ def add_tag(self, name): # Eventually almost everything todo with a watch will apply as a Tag # So we use the same model as a Watch - with self.lock: - from .model import Tag - new_tag = Tag.model(datastore_path=self.datastore_path, default={ - 'title': name.strip(), - 'date_created': int(time.time()) - }) + from .model import Tag + new_tag = Tag.model(datastore_path=self.datastore_path, default={ + 'title': name.strip(), + 'date_created': int(time.time()) + }) + + new_uuid = new_tag.get('uuid') - new_uuid = new_tag.get('uuid') + self.__data['settings']['application']['tags'][new_uuid] = new_tag + self.__data['settings']['application']['tags'][new_uuid].save_data() - self.__data['settings']['application']['tags'][new_uuid] = new_tag return new_uuid diff --git a/changedetectionio/tests/test_conditions.py b/changedetectionio/tests/test_conditions.py index 2f3637fe44e..51dde144948 100644 --- a/changedetectionio/tests/test_conditions.py +++ b/changedetectionio/tests/test_conditions.py @@ -50,7 +50,7 @@ def test_conditions_with_text_and_number(client, live_server): """Test that both text and number conditions work together with AND logic.""" set_original_response("50") -# live_server_setup(live_server) + #live_server_setup(live_server) test_url = url_for('test_endpoint', _external=True) diff --git a/changedetectionio/tests/test_restock_save_load_settings.py b/changedetectionio/tests/test_restock_save_load_settings.py index de56f38b090..8c8685e2aa0 100644 --- a/changedetectionio/tests/test_restock_save_load_settings.py +++ b/changedetectionio/tests/test_restock_save_load_settings.py @@ -50,7 +50,8 @@ def test_restock_settings_persistence(client, live_server): "headers": "", "restock_settings-price_change_min": 10, "restock_settings-price_change_threshold_percent": 5, - 'fetch_backend': "html_requests" + 'fetch_backend': "html_requests", + "processor" : 'restock_diff' }, follow_redirects=True ) @@ -69,8 +70,7 @@ def test_restock_settings_persistence(client, live_server): # This simulates shutting down and restarting the app datastore = client.application.config.get('DATASTORE') datastore.stop_thread = True - datastore.sync_to_json() # Force write to disk before recreating - + # Create a new datastore instance that will read from the saved JSON from changedetectionio import store new_datastore = store.ChangeDetectionStore(datastore_path="./test-datastore", include_default_watches=False) diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py index 052e449a7cd..83902f85622 100644 --- a/changedetectionio/update_worker.py +++ b/changedetectionio/update_worker.py @@ -527,7 +527,6 @@ def run(self): try: self.datastore.update_watch(uuid=uuid, update_obj=update_obj) - # Also save the snapshot on the first time checked, "last checked" will always be updated, so we just check history length. if changed_detected or not watch.history_n: @@ -587,6 +586,7 @@ def run(self): 'check_count': count }) + watch.save_data() self.current_uuid = None # Done self.q.task_done()