From 2039060b640189e250020e6e17db10b0a0730e7e Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Wed, 12 Jun 2024 18:01:18 +0200 Subject: [PATCH] [mod] revision of the settings_loader The intention of this PR is to modernize the settings_loader implementations. The concept is old (remember, this is partly from 2014), back then we only had one config file, meanwhile we have had a folder with config files for a very long time. Callers can now load a YAML configuration from this folder as follows :: settings_loader.get_yaml_cfg('my-config.yml') - BTW this is a fix of #3557. - Further the `existing_filename_or_none` construct dates back to times when there was not yet a `pathlib.Path` in all Python versions we supported in the past. - Typehints have been added wherever appropriate At the same time, this patch should also be downward compatible and not introduce a new environment variable. The localization of the folder with the configurations is further based on: SEARXNG_SETTINGS_PATH (wich defaults to /etc/searxng/settings.yml) Which means, the default config folder is `/etc/searxng/`. ATTENTION: intended functional changes! If SEARXNG_SETTINGS_PATH was set and pointed to a not existing file, the previous implementation silently loaded the default configuration. This behavior has been changed: if the file or folder does not exist, an EnvironmentError exception will be thrown in future. Closes: https://github.com/searxng/searxng/issues/3557 Signed-off-by: Markus Heiser --- docs/admin/settings/index.rst | 2 + docs/src/searx.settings.rst | 8 + manage | 4 +- searx/plugins/hostnames.py | 8 +- searx/settings_loader.py | 187 +++++++++++++------ searx/webapp.py | 4 +- tests/unit/settings/syntaxerror_settings.yml | 1 + tests/unit/test_settings_loader.py | 48 ++--- 8 files changed, 163 insertions(+), 99 deletions(-) create mode 100644 docs/src/searx.settings.rst diff --git a/docs/admin/settings/index.rst b/docs/admin/settings/index.rst index 005ee37e1..acc91dbdd 100644 --- a/docs/admin/settings/index.rst +++ b/docs/admin/settings/index.rst @@ -1,3 +1,5 @@ +.. _searxng settings.yml: + ======== Settings ======== diff --git a/docs/src/searx.settings.rst b/docs/src/searx.settings.rst new file mode 100644 index 000000000..1496c407c --- /dev/null +++ b/docs/src/searx.settings.rst @@ -0,0 +1,8 @@ +.. _searx.settings_loader: + +=============== +Settings Loader +=============== + +.. automodule:: searx.settings_loader + :members: diff --git a/manage b/manage index 33e2808e1..f83620cf5 100755 --- a/manage +++ b/manage @@ -54,7 +54,9 @@ fi YAMLLINT_FILES=() while IFS= read -r line; do - YAMLLINT_FILES+=("$line") + if [ "$line" != "tests/unit/settings/syntaxerror_settings.yml" ]; then + YAMLLINT_FILES+=("$line") + fi done <<< "$(git ls-files './tests/*.yml' './searx/*.yml' './utils/templates/etc/searxng/*.yml')" RST_FILES=( diff --git a/searx/plugins/hostnames.py b/searx/plugins/hostnames.py index 2fdf1669d..2783f23eb 100644 --- a/searx/plugins/hostnames.py +++ b/searx/plugins/hostnames.py @@ -96,7 +96,7 @@ from flask_babel import gettext from searx import settings from searx.plugins import logger -from searx.settings_loader import get_yaml_file +from searx.settings_loader import get_yaml_cfg name = gettext('Hostnames plugin') description = gettext('Rewrite hostnames, remove results or prioritize them based on the hostname') @@ -118,7 +118,7 @@ def _load_regular_expressions(settings_key): # load external file with configuration if isinstance(setting_value, str): - setting_value = get_yaml_file(setting_value) + setting_value = get_yaml_cfg(setting_value) if isinstance(setting_value, list): return {re.compile(r) for r in setting_value} @@ -163,10 +163,10 @@ def _matches_parsed_url(result, pattern): def on_result(_request, _search, result): for pattern, replacement in replacements.items(): if _matches_parsed_url(result, pattern): - logger.debug(result['url']) + # logger.debug(result['url']) result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc)) result['url'] = urlunparse(result[parsed]) - logger.debug(result['url']) + # logger.debug(result['url']) for url_field in _url_fields: if not result.get(url_field): diff --git a/searx/settings_loader.py b/searx/settings_loader.py index 6bf3465f0..e01f4439f 100644 --- a/searx/settings_loader.py +++ b/searx/settings_loader.py @@ -1,68 +1,116 @@ # SPDX-License-Identifier: AGPL-3.0-or-later -# pylint: disable=missing-module-docstring, too-many-branches +"""Implementations for loading configurations from YAML files. This essentially +includes the configuration of the (:ref:`SearXNG appl `) +server. The default configuration for the application server is loaded from the +:origin:`DEFAULT_SETTINGS_FILE `. This default +configuration can be completely replaced or :ref:`customized individually +` and the ``SEARXNG_SETTINGS_PATH`` environment +variable can be used to set the location from which the local customizations are +to be loaded. The rules used for this can be found in the +:py:obj:`get_user_cfg_folder` function. -from typing import Optional -from os import environ -from os.path import dirname, join, abspath, isfile +- By default, local configurations are expected in folder ``/etc/searxng`` from + where applications can load them with the :py:obj:`get_yaml_cfg` function. + +- By default, customized :ref:`SearXNG appl ` settings are + expected in a file named ``settings.yml``. + +""" + +from __future__ import annotations + +import os.path from collections.abc import Mapping from itertools import filterfalse +from pathlib import Path import yaml from searx.exceptions import SearxSettingsException +searx_dir = os.path.abspath(os.path.dirname(__file__)) -searx_dir = abspath(dirname(__file__)) +SETTINGS_YAML = Path("settings.yml") +DEFAULT_SETTINGS_FILE = Path(searx_dir) / SETTINGS_YAML +"""The :origin:`searx/settings.yml` file with all the default settings.""" -def existing_filename_or_none(file_name: str) -> Optional[str]: - if isfile(file_name): - return file_name - return None - - -def load_yaml(file_name): +def load_yaml(file_name: str | Path): + """Load YAML config from a file.""" try: with open(file_name, 'r', encoding='utf-8') as settings_yaml: - return yaml.safe_load(settings_yaml) + return yaml.safe_load(settings_yaml) or {} except IOError as e: - raise SearxSettingsException(e, file_name) from e + raise SearxSettingsException(e, str(file_name)) from e except yaml.YAMLError as e: - raise SearxSettingsException(e, file_name) from e + raise SearxSettingsException(e, str(file_name)) from e -def get_yaml_file(file_name): - path = existing_filename_or_none(join(searx_dir, file_name)) - if path is None: - raise FileNotFoundError(f"File {file_name} does not exist!") +def get_yaml_cfg(file_name: str | Path) -> dict: + """Shortcut to load a YAML config from a file, located in the - return load_yaml(path) - - -def get_default_settings_path(): - return existing_filename_or_none(join(searx_dir, 'settings.yml')) - - -def get_user_settings_path() -> Optional[str]: - """Get an user settings file. - By descending priority: - 1. ``environ['SEARXNG_SETTINGS_PATH']`` - 2. ``/etc/searxng/settings.yml`` except if ``SEARXNG_DISABLE_ETC_SETTINGS`` is ``true`` or ``1`` - 3. ``None`` + - :py:obj:`get_user_cfg_folder` or + - in the ``searx`` folder of the SearXNG installation """ - # check the environment variable SEARXNG_SETTINGS_PATH - # if the environment variable is defined, this is the last check - if 'SEARXNG_SETTINGS_PATH' in environ: - return existing_filename_or_none(environ['SEARXNG_SETTINGS_PATH']) + folder = get_user_cfg_folder() or Path(searx_dir) + fname = folder / file_name + if not fname.is_file(): + raise FileNotFoundError(f"File {fname} does not exist!") - # if SEARXNG_DISABLE_ETC_SETTINGS don't look any further - if environ.get('SEARXNG_DISABLE_ETC_SETTINGS', '').lower() in ('1', 'true'): - return None + return load_yaml(fname) - # check /etc/searxng/settings.yml - # (continue with other locations if the file is not found) - return existing_filename_or_none('/etc/searxng/settings.yml') + +def get_user_cfg_folder() -> Path | None: + """Returns folder where the local configurations are located. + + 1. If the ``SEARXNG_SETTINGS_PATH`` environment is set and points to a + folder (e.g. ``/etc/mysxng/``), all local configurations are expected in + this folder. The settings of the :ref:`SearXNG appl ` then expected in ``settings.yml`` + (e.g. ``/etc/mysxng/settings.yml``). + + 2. If the ``SEARXNG_SETTINGS_PATH`` environment is set and points to a file + (e.g. ``/etc/mysxng/myinstance.yml``), this file contains the settings of + the :ref:`SearXNG appl ` and the folder + (e.g. ``/etc/mysxng/``) is used for all other configurations. + + This type (``SEARXNG_SETTINGS_PATH`` points to a file) is suitable for + use cases in which different profiles of the :ref:`SearXNG appl ` are to be managed, such as in test scenarios. + + 3. If folder ``/etc/searxng`` exists, it is used. + + In case none of the above path exists, ``None`` is returned. In case of + environment ``SEARXNG_SETTINGS_PATH`` is set, but the (folder or file) does + not exists, a :py:obj:`EnvironmentError` is raised. + + """ + + folder = None + settings_path = os.environ.get("SEARXNG_SETTINGS_PATH") + + # Disable default /etc/searxng is intended exclusively for internal testing purposes + # and is therefore not documented! + disable_etc = os.environ.get('SEARXNG_DISABLE_ETC_SETTINGS', '').lower() in ('1', 'true') + + if settings_path: + # rule 1. and 2. + settings_path = Path(settings_path) + if settings_path.is_dir(): + folder = settings_path + elif settings_path.is_file(): + folder = settings_path.parent + else: + raise EnvironmentError(1, f"{settings_path} not exists!", settings_path) + + if not folder and not disable_etc: + # default: rule 3. + folder = Path("/etc/searxng") + if not folder.is_dir(): + folder = None + + return folder def update_dict(default_dict, user_dict): @@ -74,7 +122,9 @@ def update_dict(default_dict, user_dict): return default_dict -def update_settings(default_settings, user_settings): +def update_settings(default_settings: dict, user_settings: dict): + # pylint: disable=too-many-branches + # merge everything except the engines for k, v in user_settings.items(): if k not in ('use_default_settings', 'engines'): @@ -124,6 +174,7 @@ def update_settings(default_settings, user_settings): def is_use_default_settings(user_settings): + use_default_settings = user_settings.get('use_default_settings') if use_default_settings is True: return True @@ -134,25 +185,37 @@ def is_use_default_settings(user_settings): raise ValueError('Invalid value for use_default_settings') -def load_settings(load_user_settings=True): - default_settings_path = get_default_settings_path() - user_settings_path = get_user_settings_path() - if user_settings_path is None or not load_user_settings: - # no user settings - return (load_yaml(default_settings_path), 'load the default settings from {}'.format(default_settings_path)) +def load_settings(load_user_settings=True) -> tuple[dict, str]: + """Function for loading the settings of the SearXNG application + (:ref:`settings.yml `).""" - # user settings - user_settings = load_yaml(user_settings_path) - if is_use_default_settings(user_settings): + msg = f"load the default settings from {DEFAULT_SETTINGS_FILE}" + cfg = load_yaml(DEFAULT_SETTINGS_FILE) + cfg_folder = get_user_cfg_folder() + + if not load_user_settings or not cfg_folder: + return cfg, msg + + settings_yml = os.environ.get("SEARXNG_SETTINGS_PATH") + if settings_yml and Path(settings_yml).is_file(): + # see get_user_cfg_folder() --> SEARXNG_SETTINGS_PATH points to a file + settings_yml = Path(settings_yml).name + else: + # see get_user_cfg_folder() --> SEARXNG_SETTINGS_PATH points to a folder + settings_yml = SETTINGS_YAML + + cfg_file = cfg_folder / settings_yml + if not cfg_file.exists(): + return cfg, msg + + msg = f"load the user settings from {cfg_file}" + user_cfg = load_yaml(cfg_file) + + if is_use_default_settings(user_cfg): # the user settings are merged with the default configuration - default_settings = load_yaml(default_settings_path) - update_settings(default_settings, user_settings) - return ( - default_settings, - 'merge the default settings ( {} ) and the user settings ( {} )'.format( - default_settings_path, user_settings_path - ), - ) + msg = f"merge the default settings ( {DEFAULT_SETTINGS_FILE} ) and the user settings ( {cfg_file} )" + update_settings(cfg, user_cfg) + else: + cfg = user_cfg - # the user settings, fully replace the default configuration - return (user_settings, 'load the user settings from {}'.format(user_settings_path)) + return cfg, msg diff --git a/searx/webapp.py b/searx/webapp.py index b2b6a0bb5..4ecc9559e 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -61,7 +61,7 @@ from searx.botdetection import link_token from searx.data import ENGINE_DESCRIPTIONS from searx.results import Timing from searx.settings_defaults import OUTPUT_FORMATS -from searx.settings_loader import get_default_settings_path +from searx.settings_loader import DEFAULT_SETTINGS_FILE from searx.exceptions import SearxParameterException from searx.engines import ( DEFAULT_CATEGORY, @@ -1347,7 +1347,7 @@ def run(): port=settings['server']['port'], host=settings['server']['bind_address'], threaded=True, - extra_files=[get_default_settings_path()], + extra_files=[DEFAULT_SETTINGS_FILE], ) diff --git a/tests/unit/settings/syntaxerror_settings.yml b/tests/unit/settings/syntaxerror_settings.yml index 85cc979c0..fa9f3e5a3 100644 --- a/tests/unit/settings/syntaxerror_settings.yml +++ b/tests/unit/settings/syntaxerror_settings.yml @@ -1,2 +1,3 @@ Test: "**********" + xxx diff --git a/tests/unit/test_settings_loader.py b/tests/unit/test_settings_loader.py index 088767597..281b11c16 100644 --- a/tests/unit/test_settings_loader.py +++ b/tests/unit/test_settings_loader.py @@ -1,7 +1,9 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # pylint: disable=missing-module-docstring -from os.path import dirname, join, abspath +from pathlib import Path + +import os from unittest.mock import patch from searx.exceptions import SearxSettingsException @@ -9,7 +11,8 @@ from searx import settings_loader from tests import SearxTestCase -test_dir = abspath(dirname(__file__)) +def _settings(f_name): + return str(Path(__file__).parent.absolute() / "settings" / f_name) class TestLoad(SearxTestCase): # pylint: disable=missing-class-docstring @@ -18,16 +21,9 @@ class TestLoad(SearxTestCase): # pylint: disable=missing-class-docstring settings_loader.load_yaml('/dev/zero') with self.assertRaises(SearxSettingsException): - settings_loader.load_yaml(join(test_dir, '/settings/syntaxerror_settings.yml')) + settings_loader.load_yaml(_settings("syntaxerror_settings.yml")) - with self.assertRaises(SearxSettingsException): - settings_loader.load_yaml(join(test_dir, '/settings/empty_settings.yml')) - - def test_existing_filename_or_none(self): - self.assertIsNone(settings_loader.existing_filename_or_none('/dev/zero')) - - bad_settings_path = join(test_dir, 'settings/syntaxerror_settings.yml') - self.assertEqual(settings_loader.existing_filename_or_none(bad_settings_path), bad_settings_path) + self.assertEqual(settings_loader.load_yaml(_settings("empty_settings.yml")), {}) class TestDefaultSettings(SearxTestCase): # pylint: disable=missing-class-docstring @@ -55,24 +51,22 @@ class TestUserSettings(SearxTestCase): # pylint: disable=missing-class-docstrin self.assertFalse(settings_loader.is_use_default_settings({'use_default_settings': 0})) def test_user_settings_not_found(self): - with patch.dict(settings_loader.environ, {'SEARXNG_SETTINGS_PATH': '/dev/null'}): - settings, msg = settings_loader.load_settings() - self.assertTrue(msg.startswith('load the default settings from')) - self.assertEqual(settings['server']['secret_key'], "ultrasecretkey") + with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("not_exists.yml")}): + with self.assertRaises(EnvironmentError): + _s, _m = settings_loader.load_settings() + with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': "/folder/not/exists"}): + with self.assertRaises(EnvironmentError): + _s, _m = settings_loader.load_settings() def test_user_settings(self): - with patch.dict( - settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_simple.yml')} - ): + with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings_simple.yml")}): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('merge the default settings')) self.assertEqual(settings['server']['secret_key'], "user_secret_key") self.assertEqual(settings['server']['default_http_headers']['Custom-Header'], "Custom-Value") def test_user_settings_remove(self): - with patch.dict( - settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_remove.yml')} - ): + with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings_remove.yml")}): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('merge the default settings')) self.assertEqual(settings['server']['secret_key'], "user_secret_key") @@ -83,9 +77,7 @@ class TestUserSettings(SearxTestCase): # pylint: disable=missing-class-docstrin self.assertIn('wikipedia', engine_names) def test_user_settings_remove2(self): - with patch.dict( - settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_remove2.yml')} - ): + with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings_remove2.yml")}): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('merge the default settings')) self.assertEqual(settings['server']['secret_key'], "user_secret_key") @@ -101,9 +93,7 @@ class TestUserSettings(SearxTestCase): # pylint: disable=missing-class-docstrin self.assertEqual(newengine[0]['engine'], 'dummy') def test_user_settings_keep_only(self): - with patch.dict( - settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings_keep_only.yml')} - ): + with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings_keep_only.yml")}): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('merge the default settings')) engine_names = [engine['name'] for engine in settings['engines']] @@ -112,9 +102,7 @@ class TestUserSettings(SearxTestCase): # pylint: disable=missing-class-docstrin self.assertEqual(len(settings['engines'][2]), 1) def test_custom_settings(self): - with patch.dict( - settings_loader.environ, {'SEARXNG_SETTINGS_PATH': join(test_dir, 'settings/user_settings.yml')} - ): + with patch.dict(os.environ, {'SEARXNG_SETTINGS_PATH': _settings("user_settings.yml")}): settings, msg = settings_loader.load_settings() self.assertTrue(msg.startswith('load the user settings from')) self.assertEqual(settings['server']['port'], 9000)