1
0
Fork 0

Compare commits

...

7 Commits

Author SHA1 Message Date
Alexandre Flament 26d2e9c9aa
Merge e826a71c2b into 383d873597 2024-05-09 18:04:14 +02:00
Bnyro 383d873597 [fix] unit converter plugin: can't be disabled in settings 2024-05-09 17:40:37 +02:00
Markus Heiser fb32425d78 [mod] yacy engine: pick base_url randomly from a list of instances
Inspired by post [1] in the disscussion we had, while yacy.searchlab.eu was
broken.

[1] https://github.com/searxng/searxng/issues/3428#issuecomment-2101080101

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-05-09 17:29:15 +02:00
Bnyro 72be98e12f [feat] plugins: new calculator plugin 2024-05-09 17:23:38 +02:00
Markus Heiser 742303d030 [mod] improve unit converter plugin
- l10n support: parse and format decimal numbers by babel
- ability to add additional units
- improved unit detection (symbols are not unique)
- support for alias units (0,010C to F --> 32,018 °F)

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-05-09 17:16:31 +02:00
Markus Heiser 63cf80aae5 [fix] docutils dependencies (docutils>=0.21.2)
Another trip into the hell of dependencies: docutils tends to put major changes
in minor patches: the executables have been renamed / e.g.

     rst2html.py --> rts2html

so we have to use docutils at least from version 0.21.2, but this version of
docutils is only supported by myst-parser from version 3.0.1 on.

Additionally, docutils decided to drop python 3.8 in version 0.21 [1]

Further, linuxdoc needed an update to cope with docutils 0.21 [2]

[1] https://docutils.sourceforge.io/RELEASE-NOTES.html#release-0-21-2024-04-09
[2] https://github.com/return42/linuxdoc/pull/36

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2024-05-09 17:15:21 +02:00
Alexandre Flament e826a71c2b WIP: use the package botdetection 2024-05-05 18:01:25 +00:00
25 changed files with 480 additions and 1330 deletions

View File

@ -0,0 +1,9 @@
.. _unit converter plugin:
=====================
Unit converter plugin
=====================
.. automodule:: searx.plugins.unit_converter
:members:

View File

@ -14,9 +14,12 @@ sphinx-tabs==3.4.5
sphinxcontrib-programoutput==0.17
sphinx-autobuild==2021.3.14
sphinx-notfound-page==1.0.0
myst-parser==2.0.0
linuxdoc==20231020
myst-parser==3.0.1
linuxdoc==20240509
aiounittest==1.4.2
yamllint==1.35.1
wlc==1.14
coloredlogs==15.0.1
docutils<=0.21; python_version == '3.8'
docutils>=0.21.2; python_version > '3.8'

View File

@ -16,3 +16,4 @@ redis==5.0.4
markdown-it-py==3.0.0
fasttext-predict==0.9.2.2
pytomlpp==1.0.13; python_version < '3.11'
botdetection @ git+ssh://git@github.com/dalf/botdetection.git@alt_impl

View File

@ -1,22 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""".. _botdetection src:
Implementations used for bot detection.
"""
from ._helpers import dump_request
from ._helpers import get_real_ip
from ._helpers import get_network
from ._helpers import too_many_requests
__all__ = ['dump_request', 'get_network', 'get_real_ip', 'too_many_requests']
redis_client = None
cfg = None
def init(_cfg, _redis_client):
global redis_client, cfg # pylint: disable=global-statement
redis_client = _redis_client
cfg = _cfg

View File

@ -1,128 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring, invalid-name
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
IPv4Address,
IPv6Address,
ip_network,
)
import flask
import werkzeug
from searx import logger
from . import config
logger = logger.getChild('botdetection')
def dump_request(request: flask.Request):
return (
request.path
+ " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
+ " || X-Real-IP: %s" % request.headers.get('X-Real-IP')
+ " || form: %s" % request.form
+ " || Accept: %s" % request.headers.get('Accept')
+ " || Accept-Language: %s" % request.headers.get('Accept-Language')
+ " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding')
+ " || Content-Type: %s" % request.headers.get('Content-Type')
+ " || Content-Length: %s" % request.headers.get('Content-Length')
+ " || Connection: %s" % request.headers.get('Connection')
+ " || User-Agent: %s" % request.headers.get('User-Agent')
)
def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkzeug.Response | None:
"""Returns a HTTP 429 response object and writes a ERROR message to the
'botdetection' logger. This function is used in part by the filter methods
to return the default ``Too Many Requests`` response.
"""
logger.debug("BLOCK %s: %s", network.compressed, log_msg)
return flask.make_response(('Too Many Requests', 429))
def get_network(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> IPv4Network | IPv6Network:
"""Returns the (client) network of whether the real_ip is part of."""
if real_ip.version == 6:
prefix = cfg['real_ip.ipv6_prefix']
else:
prefix = cfg['real_ip.ipv4_prefix']
network = ip_network(f"{real_ip}/{prefix}", strict=False)
# logger.debug("get_network(): %s", network.compressed)
return network
_logged_errors = []
def _log_error_only_once(err_msg):
if err_msg not in _logged_errors:
logger.error(err_msg)
_logged_errors.append(err_msg)
def get_real_ip(request: flask.Request) -> str:
"""Returns real IP of the request. Since not all proxies set all the HTTP
headers and incoming headers can be faked it may happen that the IP cannot
be determined correctly.
.. sidebar:: :py:obj:`flask.Request.remote_addr`
SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
This function tries to get the remote IP in the order listed below,
additional some tests are done and if inconsistencies or errors are
detected, they are logged.
The remote IP of the request is taken from (first match):
- X-Forwarded-For_ header
- `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__
- :py:obj:`flask.Request.remote_addr`
.. _ProxyFix:
https://werkzeug.palletsprojects.com/middleware/proxy_fix/
.. _X-Forwarded-For:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
"""
forwarded_for = request.headers.get("X-Forwarded-For")
real_ip = request.headers.get('X-Real-IP')
remote_addr = request.remote_addr
# logger.debug(
# "X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr
# )
if not forwarded_for:
_log_error_only_once("X-Forwarded-For header is not set!")
else:
from . import cfg # pylint: disable=import-outside-toplevel, cyclic-import
forwarded_for = [x.strip() for x in forwarded_for.split(',')]
x_for: int = cfg['real_ip.x_for'] # type: ignore
forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
if not real_ip:
_log_error_only_once("X-Real-IP header is not set!")
if forwarded_for and real_ip and forwarded_for != real_ip:
logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for)
if forwarded_for and remote_addr and forwarded_for != remote_addr:
logger.warning(
"IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for
)
if real_ip and remote_addr and real_ip != remote_addr:
logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
request_ip = forwarded_for or real_ip or remote_addr or '0.0.0.0'
# logger.debug("get_real_ip() -> %s", request_ip)
return request_ip

View File

@ -1,400 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Configuration class :py:class:`Config` with deep-update, schema validation
and deprecated names.
The :py:class:`Config` class implements a configuration that is based on
structured dictionaries. The configuration schema is defined in a dictionary
structure and the configuration data is given in a dictionary structure.
"""
from __future__ import annotations
from typing import Any
import copy
import typing
import logging
import pathlib
try:
import tomllib
pytomlpp = None
USE_TOMLLIB = True
except ImportError:
import pytomlpp
tomllib = None
USE_TOMLLIB = False
__all__ = ['Config', 'UNSET', 'SchemaIssue']
log = logging.getLogger(__name__)
class FALSE:
"""Class of ``False`` singelton"""
# pylint: disable=multiple-statements
def __init__(self, msg):
self.msg = msg
def __bool__(self):
return False
def __str__(self):
return self.msg
__repr__ = __str__
UNSET = FALSE('<UNSET>')
class SchemaIssue(ValueError):
"""Exception to store and/or raise a message from a schema issue."""
def __init__(self, level: typing.Literal['warn', 'invalid'], msg: str):
self.level = level
super().__init__(msg)
def __str__(self):
return f"[cfg schema {self.level}] {self.args[0]}"
class Config:
"""Base class used for configuration"""
UNSET = UNSET
@classmethod
def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict) -> Config:
# init schema
log.debug("load schema file: %s", schema_file)
cfg = cls(cfg_schema=toml_load(schema_file), deprecated=deprecated)
if not cfg_file.exists():
log.warning("missing config file: %s", cfg_file)
return cfg
# load configuration
log.debug("load config file: %s", cfg_file)
upd_cfg = toml_load(cfg_file)
is_valid, issue_list = cfg.validate(upd_cfg)
for msg in issue_list:
log.error(str(msg))
if not is_valid:
raise TypeError(f"schema of {cfg_file} is invalid!")
cfg.update(upd_cfg)
return cfg
def __init__(self, cfg_schema: typing.Dict, deprecated: typing.Dict[str, str]):
"""Construtor of class Config.
:param cfg_schema: Schema of the configuration
:param deprecated: dictionary that maps deprecated configuration names to a messages
These values are needed for validation, see :py:obj:`validate`.
"""
self.cfg_schema = cfg_schema
self.deprecated = deprecated
self.cfg = copy.deepcopy(cfg_schema)
def __getitem__(self, key: str) -> Any:
return self.get(key)
def validate(self, cfg: dict):
"""Validation of dictionary ``cfg`` on :py:obj:`Config.SCHEMA`.
Validation is done by :py:obj:`validate`."""
return validate(self.cfg_schema, cfg, self.deprecated)
def update(self, upd_cfg: dict):
"""Update this configuration by ``upd_cfg``."""
dict_deepupdate(self.cfg, upd_cfg)
def default(self, name: str):
"""Returns default value of field ``name`` in ``self.cfg_schema``."""
return value(name, self.cfg_schema)
def get(self, name: str, default: Any = UNSET, replace: bool = True) -> Any:
"""Returns the value to which ``name`` points in the configuration.
If there is no such ``name`` in the config and the ``default`` is
:py:obj:`UNSET`, a :py:obj:`KeyError` is raised.
"""
parent = self._get_parent_dict(name)
val = parent.get(name.split('.')[-1], UNSET)
if val is UNSET:
if default is UNSET:
raise KeyError(name)
val = default
if replace and isinstance(val, str):
val = val % self
return val
def set(self, name: str, val):
"""Set the value to which ``name`` points in the configuration.
If there is no such ``name`` in the config, a :py:obj:`KeyError` is
raised.
"""
parent = self._get_parent_dict(name)
parent[name.split('.')[-1]] = val
def _get_parent_dict(self, name):
parent_name = '.'.join(name.split('.')[:-1])
if parent_name:
parent = value(parent_name, self.cfg)
else:
parent = self.cfg
if (parent is UNSET) or (not isinstance(parent, dict)):
raise KeyError(parent_name)
return parent
def path(self, name: str, default=UNSET):
"""Get a :py:class:`pathlib.Path` object from a config string."""
val = self.get(name, default)
if val is UNSET:
if default is UNSET:
raise KeyError(name)
return default
return pathlib.Path(str(val))
def pyobj(self, name, default=UNSET):
"""Get python object refered by full qualiffied name (FQN) in the config
string."""
fqn = self.get(name, default)
if fqn is UNSET:
if default is UNSET:
raise KeyError(name)
return default
(modulename, name) = str(fqn).rsplit('.', 1)
m = __import__(modulename, {}, {}, [name], 0)
return getattr(m, name)
def toml_load(file_name):
if USE_TOMLLIB:
# Python >= 3.11
try:
with open(file_name, "rb") as f:
return tomllib.load(f)
except tomllib.TOMLDecodeError as exc:
msg = str(exc).replace('\t', '').replace('\n', ' ')
log.error("%s: %s", file_name, msg)
raise
# fallback to pytomlpp for Python < 3.11
try:
return pytomlpp.load(file_name)
except pytomlpp.DecodeError as exc:
msg = str(exc).replace('\t', '').replace('\n', ' ')
log.error("%s: %s", file_name, msg)
raise
# working with dictionaries
def value(name: str, data_dict: dict):
"""Returns the value to which ``name`` points in the ``dat_dict``.
.. code: python
>>> data_dict = {
"foo": {"bar": 1 },
"bar": {"foo": 2 },
"foobar": [1, 2, 3],
}
>>> value('foobar', data_dict)
[1, 2, 3]
>>> value('foo.bar', data_dict)
1
>>> value('foo.bar.xxx', data_dict)
<UNSET>
"""
ret_val = data_dict
for part in name.split('.'):
if isinstance(ret_val, dict):
ret_val = ret_val.get(part, UNSET)
if ret_val is UNSET:
break
return ret_val
def validate(
schema_dict: typing.Dict, data_dict: typing.Dict, deprecated: typing.Dict[str, str]
) -> typing.Tuple[bool, list]:
"""Deep validation of dictionary in ``data_dict`` against dictionary in
``schema_dict``. Argument deprecated is a dictionary that maps deprecated
configuration names to a messages::
deprecated = {
"foo.bar" : "config 'foo.bar' is deprecated, use 'bar.foo'",
"..." : "..."
}
The function returns a python tuple ``(is_valid, issue_list)``:
``is_valid``:
A bool value indicating ``data_dict`` is valid or not.
``issue_list``:
A list of messages (:py:obj:`SchemaIssue`) from the validation::
[schema warn] data_dict: deprecated 'fontlib.foo': <DEPRECATED['foo.bar']>
[schema invalid] data_dict: key unknown 'fontlib.foo'
[schema invalid] data_dict: type mismatch 'fontlib.foo': expected ..., is ...
If ``schema_dict`` or ``data_dict`` is not a dictionary type a
:py:obj:`SchemaIssue` is raised.
"""
names = []
is_valid = True
issue_list = []
if not isinstance(schema_dict, dict):
raise SchemaIssue('invalid', "schema_dict is not a dict type")
if not isinstance(data_dict, dict):
raise SchemaIssue('invalid', f"data_dict issue{'.'.join(names)} is not a dict type")
is_valid, issue_list = _validate(names, issue_list, schema_dict, data_dict, deprecated)
return is_valid, issue_list
def _validate(
names: typing.List,
issue_list: typing.List,
schema_dict: typing.Dict,
data_dict: typing.Dict,
deprecated: typing.Dict[str, str],
) -> typing.Tuple[bool, typing.List]:
is_valid = True
for key, data_value in data_dict.items():
names.append(key)
name = '.'.join(names)
deprecated_msg = deprecated.get(name)
# print("XXX %s: key %s // data_value: %s" % (name, key, data_value))
if deprecated_msg:
issue_list.append(SchemaIssue('warn', f"data_dict '{name}': deprecated - {deprecated_msg}"))
schema_value = value(name, schema_dict)
# print("YYY %s: key %s // schema_value: %s" % (name, key, schema_value))
if schema_value is UNSET:
if not deprecated_msg:
issue_list.append(SchemaIssue('invalid', f"data_dict '{name}': key unknown in schema_dict"))
is_valid = False
elif type(schema_value) != type(data_value): # pylint: disable=unidiomatic-typecheck
issue_list.append(
SchemaIssue(
'invalid',
(f"data_dict: type mismatch '{name}':" f" expected {type(schema_value)}, is: {type(data_value)}"),
)
)
is_valid = False
elif isinstance(data_value, dict):
_valid, _ = _validate(names, issue_list, schema_dict, data_value, deprecated)
is_valid = is_valid and _valid
names.pop()
return is_valid, issue_list
def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None):
"""Deep-update of dictionary in ``base_dict`` by dictionary in ``upd_dict``.
For each ``upd_key`` & ``upd_val`` pair in ``upd_dict``:
0. If types of ``base_dict[upd_key]`` and ``upd_val`` do not match raise a
:py:obj:`TypeError`.
1. If ``base_dict[upd_key]`` is a dict: recursively deep-update it by ``upd_val``.
2. If ``base_dict[upd_key]`` not exist: set ``base_dict[upd_key]`` from a
(deep-) copy of ``upd_val``.
3. If ``upd_val`` is a list, extend list in ``base_dict[upd_key]`` by the
list in ``upd_val``.
4. If ``upd_val`` is a set, update set in ``base_dict[upd_key]`` by set in
``upd_val``.
"""
# pylint: disable=too-many-branches
if not isinstance(base_dict, dict):
raise TypeError("argument 'base_dict' is not a ditionary type")
if not isinstance(upd_dict, dict):
raise TypeError("argument 'upd_dict' is not a ditionary type")
if names is None:
names = []
for upd_key, upd_val in upd_dict.items():
# For each upd_key & upd_val pair in upd_dict:
if isinstance(upd_val, dict):
if upd_key in base_dict:
# if base_dict[upd_key] exists, recursively deep-update it
if not isinstance(base_dict[upd_key], dict):
raise TypeError(f"type mismatch {'.'.join(names)}: is not a dict type in base_dict")
dict_deepupdate(
base_dict[upd_key],
upd_val,
names
+ [
upd_key,
],
)
else:
# if base_dict[upd_key] not exist, set base_dict[upd_key] from deepcopy of upd_val
base_dict[upd_key] = copy.deepcopy(upd_val)
elif isinstance(upd_val, list):
if upd_key in base_dict:
# if base_dict[upd_key] exists, base_dict[up_key] is extended by
# the list from upd_val
if not isinstance(base_dict[upd_key], list):
raise TypeError(f"type mismatch {'.'.join(names)}: is not a list type in base_dict")
base_dict[upd_key].extend(upd_val)
else:
# if base_dict[upd_key] doesn't exists, set base_dict[key] from a deepcopy of the
# list in upd_val.
base_dict[upd_key] = copy.deepcopy(upd_val)
elif isinstance(upd_val, set):
if upd_key in base_dict:
# if base_dict[upd_key] exists, base_dict[up_key] is updated by the set in upd_val
if not isinstance(base_dict[upd_key], set):
raise TypeError(f"type mismatch {'.'.join(names)}: is not a set type in base_dict")
base_dict[upd_key].update(upd_val.copy())
else:
# if base_dict[upd_key] doesn't exists, set base_dict[upd_key] from a copy of the
# set in upd_val
base_dict[upd_key] = upd_val.copy()
else:
# for any other type of upd_val replace or add base_dict[upd_key] by a copy
# of upd_val
base_dict[upd_key] = copy.copy(upd_val)

View File

@ -1,38 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Method ``http_accept``
----------------------
The ``http_accept`` method evaluates a request as the request of a bot if the
Accept_ header ..
- did not contain ``text/html``
.. _Accept:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept
"""
# pylint: disable=unused-argument
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
)
import flask
import werkzeug
from . import config
from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
if 'text/html' not in request.accept_mimetypes:
return too_many_requests(network, "HTTP header Accept did not contain text/html")
return None

View File

@ -1,40 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Method ``http_accept_encoding``
-------------------------------
The ``http_accept_encoding`` method evaluates a request as the request of a
bot if the Accept-Encoding_ header ..
- did not contain ``gzip`` AND ``deflate`` (if both values are missed)
- did not contain ``text/html``
.. _Accept-Encoding:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
"""
# pylint: disable=unused-argument
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
)
import flask
import werkzeug
from . import config
from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
if not ('gzip' in accept_list or 'deflate' in accept_list):
return too_many_requests(network, "HTTP header Accept-Encoding did not contain gzip nor deflate")
return None

View File

@ -1,34 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Method ``http_accept_language``
-------------------------------
The ``http_accept_language`` method evaluates a request as the request of a bot
if the Accept-Language_ header is unset.
.. _Accept-Language:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
"""
# pylint: disable=unused-argument
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
)
import flask
import werkzeug
from . import config
from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
if request.headers.get('Accept-Language', '').strip() == '':
return too_many_requests(network, "missing HTTP header Accept-Language")
return None

View File

@ -1,36 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Method ``http_connection``
--------------------------
The ``http_connection`` method evaluates a request as the request of a bot if
the Connection_ header is set to ``close``.
.. _Connection:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection
"""
# pylint: disable=unused-argument
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
)
import flask
import werkzeug
from . import config
from ._helpers import too_many_requests
def filter_request(
network: IPv4Network | IPv6Network,
request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
if request.headers.get('Connection', '').strip() == 'close':
return too_many_requests(network, "HTTP header 'Connection=close")
return None

View File

@ -1,66 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Method ``http_user_agent``
--------------------------
The ``http_user_agent`` method evaluates a request as the request of a bot if
the User-Agent_ header is unset or matches the regular expression
:py:obj:`USER_AGENT`.
.. _User-Agent:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
"""
# pylint: disable=unused-argument
from __future__ import annotations
import re
from ipaddress import (
IPv4Network,
IPv6Network,
)
import flask
import werkzeug
from . import config
from ._helpers import too_many_requests
USER_AGENT = (
r'('
+ r'unknown'
+ r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
+ r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
+ r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
+ r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
+ r'|ZmEu|BLEXBot|bitlybot|HeadlessChrome'
# unmaintained Farside instances
+ r'|'
+ re.escape(r'Mozilla/5.0 (compatible; Farside/0.1.0; +https://farside.link)')
# other bots and client to block
+ '|.*PetalBot.*'
+ r')'
)
"""Regular expression that matches to User-Agent_ from known *bots*"""
_regexp = None
def regexp_user_agent():
global _regexp # pylint: disable=global-statement
if not _regexp:
_regexp = re.compile(USER_AGENT)
return _regexp
def filter_request(
network: IPv4Network | IPv6Network,
request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
user_agent = request.headers.get('User-Agent', 'unknown')
if regexp_user_agent().match(user_agent):
return too_many_requests(network, f"bot detected, HTTP header User-Agent: {user_agent}")
return None

View File

@ -1,146 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""".. _botdetection.ip_limit:
Method ``ip_limit``
-------------------
The ``ip_limit`` method counts request from an IP in *sliding windows*. If
there are to many requests in a sliding window, the request is evaluated as a
bot request. This method requires a redis DB and needs a HTTP X-Forwarded-For_
header. To take privacy only the hash value of an IP is stored in the redis DB
and at least for a maximum of 10 minutes.
The :py:obj:`.link_token` method can be used to investigate whether a request is
*suspicious*. To activate the :py:obj:`.link_token` method in the
:py:obj:`.ip_limit` method add the following configuration:
.. code:: toml
[botdetection.ip_limit]
link_token = true
If the :py:obj:`.link_token` method is activated and a request is *suspicious*
the request rates are reduced:
- :py:obj:`BURST_MAX` -> :py:obj:`BURST_MAX_SUSPICIOUS`
- :py:obj:`LONG_MAX` -> :py:obj:`LONG_MAX_SUSPICIOUS`
To intercept bots that get their IPs from a range of IPs, there is a
:py:obj:`SUSPICIOUS_IP_WINDOW`. In this window the suspicious IPs are stored
for a longer time. IPs stored in this sliding window have a maximum of
:py:obj:`SUSPICIOUS_IP_MAX` accesses before they are blocked. As soon as the IP
makes a request that is not suspicious, the sliding window for this IP is
dropped.
.. _X-Forwarded-For:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
"""
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
)
import flask
import werkzeug
from searx import redisdb
from searx.redislib import incr_sliding_window, drop_counter
from . import link_token
from . import config
from ._helpers import (
too_many_requests,
logger,
)
logger = logger.getChild('ip_limit')
BURST_WINDOW = 20
"""Time (sec) before sliding window for *burst* requests expires."""
BURST_MAX = 15
"""Maximum requests from one IP in the :py:obj:`BURST_WINDOW`"""
BURST_MAX_SUSPICIOUS = 2
"""Maximum of suspicious requests from one IP in the :py:obj:`BURST_WINDOW`"""
LONG_WINDOW = 600
"""Time (sec) before the longer sliding window expires."""
LONG_MAX = 150
"""Maximum requests from one IP in the :py:obj:`LONG_WINDOW`"""
LONG_MAX_SUSPICIOUS = 10
"""Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`"""
API_WONDOW = 3600
"""Time (sec) before sliding window for API requests (format != html) expires."""
API_MAX = 4
"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
SUSPICIOUS_IP_WINDOW = 3600 * 24 * 30
"""Time (sec) before sliding window for one suspicious IP expires."""
SUSPICIOUS_IP_MAX = 3
"""Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`."""
def filter_request(
network: IPv4Network | IPv6Network,
request: flask.Request,
cfg: config.Config,
) -> werkzeug.Response | None:
# pylint: disable=too-many-return-statements
redis_client = redisdb.client()
if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']:
logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed)
return None
if request.args.get('format', 'html') != 'html':
c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + network.compressed, API_WONDOW)
if c > API_MAX:
return too_many_requests(network, "too many request in API_WINDOW")
if cfg['botdetection.ip_limit.link_token']:
suspicious = link_token.is_suspicious(network, request, True)
if not suspicious:
# this IP is no longer suspicious: release ip again / delete the counter of this IP
drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed)
return None
# this IP is suspicious: count requests from this IP
c = incr_sliding_window(
redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed, SUSPICIOUS_IP_WINDOW
)
if c > SUSPICIOUS_IP_MAX:
logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", network)
return flask.redirect(flask.url_for('index'), code=302)
c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
if c > BURST_MAX_SUSPICIOUS:
return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)")
c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
if c > LONG_MAX_SUSPICIOUS:
return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)")
return None
# vanilla limiter without extensions counts BURST_MAX and LONG_MAX
c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
if c > BURST_MAX:
return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX)")
c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
if c > LONG_MAX:
return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX)")
return None

View File

@ -1,84 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
""".. _botdetection.ip_lists:
Method ``ip_lists``
-------------------
The ``ip_lists`` method implements IP :py:obj:`block- <block_ip>` and
:py:obj:`pass-lists <pass_ip>`.
.. code:: toml
[botdetection.ip_lists]
pass_ip = [
'167.235.158.251', # IPv4 of check.searx.space
'192.168.0.0/16', # IPv4 private network
'fe80::/10' # IPv6 linklocal
]
block_ip = [
'93.184.216.34', # IPv4 of example.org
'257.1.1.1', # invalid IP --> will be ignored, logged in ERROR class
]
"""
# pylint: disable=unused-argument
from __future__ import annotations
from typing import Tuple
from ipaddress import (
ip_network,
IPv4Address,
IPv6Address,
)
from . import config
from ._helpers import logger
logger = logger.getChild('ip_limit')
SEARXNG_ORG = [
# https://github.com/searxng/searxng/pull/2484#issuecomment-1576639195
'167.235.158.251', # IPv4 check.searx.space
'2a01:04f8:1c1c:8fc2::/64', # IPv6 check.searx.space
]
"""Passlist of IPs from the SearXNG organization, e.g. `check.searx.space`."""
def pass_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
"""Checks if the IP on the subnet is in one of the members of the
``botdetection.ip_lists.pass_ip`` list.
"""
if cfg.get('botdetection.ip_lists.pass_searxng_org', default=True):
for net in SEARXNG_ORG:
net = ip_network(net, strict=False)
if real_ip.version == net.version and real_ip in net:
return True, f"IP matches {net.compressed} in SEARXNG_ORG list."
return ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.pass_ip', cfg)
def block_ip(real_ip: IPv4Address | IPv6Address, cfg: config.Config) -> Tuple[bool, str]:
"""Checks if the IP on the subnet is in one of the members of the
``botdetection.ip_lists.block_ip`` list.
"""
block, msg = ip_is_subnet_of_member_in_list(real_ip, 'botdetection.ip_lists.block_ip', cfg)
if block:
msg += " To remove IP from list, please contact the maintainer of the service."
return block, msg
def ip_is_subnet_of_member_in_list(
real_ip: IPv4Address | IPv6Address, list_name: str, cfg: config.Config
) -> Tuple[bool, str]:
for net in cfg.get(list_name, default=[]):
try:
net = ip_network(net, strict=False)
except ValueError:
logger.error("invalid IP %s in %s", net, list_name)
continue
if real_ip.version == net.version and real_ip in net:
return True, f"IP matches {net.compressed} in {list_name}."
return False, f"IP is not a member of an item in the f{list_name} list"

View File

@ -1,154 +0,0 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Method ``link_token``
---------------------
The ``link_token`` method evaluates a request as :py:obj:`suspicious
<is_suspicious>` if the URL ``/client<token>.css`` is not requested by the
client. By adding a random component (the token) in the URL, a bot can not send
a ping by request a static URL.
.. note::
This method requires a redis DB and needs a HTTP X-Forwarded-For_ header.
To get in use of this method a flask URL route needs to be added:
.. code:: python
@app.route('/client<token>.css', methods=['GET', 'POST'])
def client_token(token=None):
link_token.ping(request, token)
return Response('', mimetype='text/css')
And in the HTML template from flask a stylesheet link is needed (the value of
``link_token`` comes from :py:obj:`get_token`):
.. code:: html
<link rel="stylesheet"
href="{{ url_for('client_token', token=link_token) }}"
type="text/css" />
.. _X-Forwarded-For:
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
"""
from __future__ import annotations
from ipaddress import (
IPv4Network,
IPv6Network,
ip_address,
)
import string
import random
import flask
from searx import logger
from searx import redisdb
from searx.redislib import secret_hash
from ._helpers import (
get_network,
get_real_ip,
)
TOKEN_LIVE_TIME = 600
"""Livetime (sec) of limiter's CSS token."""
PING_LIVE_TIME = 3600
"""Livetime (sec) of the ping-key from a client (request)"""
PING_KEY = 'SearXNG_limiter.ping'
"""Prefix of all ping-keys generated by :py:obj:`get_ping_key`"""
TOKEN_KEY = 'SearXNG_limiter.token'
"""Key for which the current token is stored in the DB"""
logger = logger.getChild('botdetection.link_token')
def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, renew: bool = False):
"""Checks whether a valid ping is exists for this (client) network, if not
this request is rated as *suspicious*. If a valid ping exists and argument
``renew`` is ``True`` the expire time of this ping is reset to
:py:obj:`PING_LIVE_TIME`.
"""
redis_client = redisdb.client()
if not redis_client:
return False
ping_key = get_ping_key(network, request)
if not redis_client.get(ping_key):
logger.info("missing ping (IP: %s) / request: %s", network.compressed, ping_key)
return True
if renew:
redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
logger.debug("found ping for (client) network %s -> %s", network.compressed, ping_key)
return False
def ping(request: flask.Request, token: str):
"""This function is called by a request to URL ``/client<token>.css``. If
``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB.
The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
"""
from . import redis_client, cfg # pylint: disable=import-outside-toplevel, cyclic-import
if not redis_client:
return
if not token_is_valid(token):
return
real_ip = ip_address(get_real_ip(request))
network = get_network(real_ip, cfg)
ping_key = get_ping_key(network, request)
logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key)
redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
def get_ping_key(network: IPv4Network | IPv6Network, request: flask.Request) -> str:
"""Generates a hashed key that fits (more or less) to a *WEB-browser
session* in a network."""
return (
PING_KEY
+ "["
+ secret_hash(
network.compressed + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '')
)
+ "]"
)
def token_is_valid(token) -> bool:
valid = token == get_token()
logger.debug("token is valid --> %s", valid)
return valid
def get_token() -> str:
"""Returns current token. If there is no currently active token a new token
is generated randomly and stored in the redis DB.
- :py:obj:`TOKEN_LIVE_TIME`
- :py:obj:`TOKEN_KEY`
"""
redis_client = redisdb.client()
if not redis_client:
# This function is also called when limiter is inactive / no redis DB
# (see render function in webapp.py)
return '12345678'
token = redis_client.get(TOKEN_KEY)
if token:
token = token.decode('UTF-8')
else:
token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16))
redis_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME)
return token

View File

@ -22,20 +22,26 @@ The engine has the following (additional) settings:
- :py:obj:`search_mode`
- :py:obj:`search_type`
The :py:obj:`base_url` has to be set in the engine named `yacy` and is used by
all yacy engines.
.. code:: yaml
- name: yacy
engine: yacy
categories: general
search_type: text
base_url: https://yacy.searchlab.eu
shortcut: ya
base_url:
- https://yacy.searchlab.eu
- https://search.lomig.me
- https://yacy.ecosys.eu
- https://search.webproject.link
- name: yacy images
engine: yacy
categories: images
search_type: image
base_url: https://yacy.searchlab.eu
shortcut: yai
disabled: true
@ -45,6 +51,9 @@ Implementations
"""
# pylint: disable=fixme
from __future__ import annotations
import random
from json import loads
from urllib.parse import urlencode
from dateutil import parser
@ -87,15 +96,10 @@ search_type = 'text'
``video`` are not yet implemented (Pull-Requests are welcome).
"""
# search-url
base_url = 'https://yacy.searchlab.eu'
search_url = (
'/yacysearch.json?{query}'
'&startRecord={offset}'
'&maximumRecords={limit}'
'&contentdom={search_type}'
'&resource={resource}'
)
base_url: list | str = 'https://yacy.searchlab.eu'
"""The value is an URL or a list of URLs. In the latter case instance will be
selected randomly.
"""
def init(_):
@ -108,23 +112,34 @@ def init(_):
raise ValueError('search_type "%s" is not one of %s' % (search_type, valid_types))
def _base_url() -> str:
from searx.engines import engines # pylint: disable=import-outside-toplevel
url = engines['yacy'].base_url # type: ignore
if isinstance(url, list):
url = random.choice(url)
return url
def request(query, params):
offset = (params['pageno'] - 1) * number_of_results
params['url'] = base_url + search_url.format(
query=urlencode({'query': query}),
offset=offset,
limit=number_of_results,
search_type=search_type,
resource=search_mode,
)
if http_digest_auth_user and http_digest_auth_pass:
params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
args = {
'query': query,
'startRecord': offset,
'maximumRecords': number_of_results,
'contentdom': search_type,
'resource': search_mode,
}
# add language tag if specified
if params['language'] != 'all':
params['url'] += '&lr=lang_' + params['language'].split('-')[0]
args['lr'] = 'lang_' + params['language'].split('-')[0]
params["url"] = f"{_base_url()}/yacysearch.json?{urlencode(args)}"
if http_digest_auth_user and http_digest_auth_pass:
params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
return params

View File

@ -96,33 +96,37 @@ from __future__ import annotations
import sys
from pathlib import Path
from ipaddress import ip_address
import flask
import werkzeug
from searx import (
logger,
redisdb,
)
from searx import botdetection
from searx.botdetection import (
config,
http_accept,
http_accept_encoding,
http_accept_language,
http_user_agent,
ip_limit,
ip_lists,
get_network,
get_real_ip,
dump_request,
from botdetection import (
install_botdetection,
RouteFilter,
Config,
PredefinedRequestFilter,
RequestContext,
RequestInfo,
too_many_requests,
)
from searx import logger, redisdb
try:
import tomllib
pytomlpp = None
USE_TOMLLIB = True
except ImportError:
import pytomlpp
tomllib = None
USE_TOMLLIB = False
# the configuration are limiter.toml and "limiter" in settings.yml so, for
# coherency, the logger is "limiter"
logger = logger.getChild('limiter')
CFG: config.Config = None # type: ignore
CFG: Config = None # type: ignore
_INSTALLED = False
LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
@ -131,82 +135,71 @@ LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
LIMITER_CFG = Path('/etc/searxng/limiter.toml')
"""Local Limiter configuration."""
CFG_DEPRECATED = {
# "dummy.old.foo": "config 'dummy.old.foo' exists only for tests. Don't use it in your real project config."
}
API_WINDOW = 3600
"""Time (sec) before sliding window for API requests (format != html) expires."""
API_MAX = 4
"""Maximum requests from one IP in the :py:obj:`API_WINDOW`"""
def get_cfg() -> config.Config:
def toml_load(file_name):
if USE_TOMLLIB:
# Python >= 3.11
try:
with open(file_name, "rb") as f:
return tomllib.load(f)
except tomllib.TOMLDecodeError as exc:
msg = str(exc).replace('\t', '').replace('\n', ' ')
logger.error("%s: %s", file_name, msg)
raise
# fallback to pytomlpp for Python < 3.11
try:
return pytomlpp.load(file_name)
except pytomlpp.DecodeError as exc:
msg = str(exc).replace('\t', '').replace('\n', ' ')
logger.error("%s: %s", file_name, msg)
raise
def get_config() -> Config:
global CFG # pylint: disable=global-statement
if CFG is None:
CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, LIMITER_CFG, CFG_DEPRECATED)
if LIMITER_CFG.is_file():
data = toml_load(LIMITER_CFG)
else:
data = toml_load(LIMITER_CFG_SCHEMA)
CFG = Config(real_ip=data["real_ip"], botdetection=data["botdetection"])
return CFG
def filter_request(request: flask.Request) -> werkzeug.Response | None:
# pylint: disable=too-many-return-statements
cfg = get_cfg()
real_ip = ip_address(get_real_ip(request))
network = get_network(real_ip, cfg)
if request.path == '/healthz':
return None
# link-local
if network.is_link_local:
return None
# block- & pass- lists
#
# 1. The IP of the request is first checked against the pass-list; if the IP
# matches an entry in the list, the request is not blocked.
# 2. If no matching entry is found in the pass-list, then a check is made against
# the block list; if the IP matches an entry in the list, the request is
# blocked.
# 3. If the IP is not in either list, the request is not blocked.
match, msg = ip_lists.pass_ip(real_ip, cfg)
if match:
logger.warning("PASS %s: matched PASSLIST - %s", network.compressed, msg)
return None
match, msg = ip_lists.block_ip(real_ip, cfg)
if match:
logger.error("BLOCK %s: matched BLOCKLIST - %s", network.compressed, msg)
return flask.make_response(('IP is on BLOCKLIST - %s' % msg, 429))
# methods applied on /
for func in [
http_user_agent,
]:
val = func.filter_request(network, request, cfg)
if val is not None:
return val
# methods applied on /search
if request.path == '/search':
for func in [
http_accept,
http_accept_encoding,
http_accept_language,
http_user_agent,
ip_limit,
]:
val = func.filter_request(network, request, cfg)
if val is not None:
return val
logger.debug(f"OK {network}: %s", dump_request(flask.request))
def api_rate_filter_request(
context: RequestContext,
request_info: RequestInfo,
request: flask.Request,
) -> werkzeug.Response | None:
if request.args.get("format", "html") != "html":
c = context.redislib.incr_sliding_window("ip_limit.API_WINDOW:" + request_info.network.compressed, API_WINDOW)
if c > API_MAX:
return too_many_requests(request_info, "too many request in API_WINDOW")
return None
def pre_request():
"""See :py:obj:`flask.Flask.before_request`"""
return filter_request(flask.request)
route_filter = RouteFilter(
{
"/healthz": [],
"/search": [
PredefinedRequestFilter.HTTP_ACCEPT,
PredefinedRequestFilter.HTTP_ACCEPT_ENCODING,
PredefinedRequestFilter.HTTP_ACCEPT_LANGUAGE,
PredefinedRequestFilter.HTTP_USER_AGENT,
api_rate_filter_request,
PredefinedRequestFilter.IP_LIMIT,
],
"*": [
PredefinedRequestFilter.HTTP_USER_AGENT,
],
}
)
def is_installed():
@ -221,13 +214,10 @@ def initialize(app: flask.Flask, settings):
# even if the limiter is not activated, the botdetection must be activated
# (e.g. the self_info plugin uses the botdetection to get client IP)
cfg = get_cfg()
redis_client = redisdb.client()
botdetection.init(cfg, redis_client)
if not (settings['server']['limiter'] or settings['server']['public_instance']):
return
redis_client = redisdb.client()
if not redis_client:
logger.error(
"The limiter requires Redis, please consult the documentation: "
@ -237,10 +227,12 @@ def initialize(app: flask.Flask, settings):
sys.exit(1)
return
# install botdetection
_INSTALLED = True
config = get_config()
if settings['server']['public_instance']:
# overwrite limiter.toml setting
cfg.set('botdetection.ip_limit.link_token', True)
config.botdetection.ip_limit.link_token = True
app.before_request(pre_request)
install_botdetection(app, redis_client, config, route_filter)

View File

@ -0,0 +1,88 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Calculate mathematical expressions using ack#eval
"""
import ast
import operator
from flask_babel import gettext
from searx import settings
name = "Basic Calculator"
description = gettext("Calculate mathematical expressions via the search bar")
default_on = False
preference_section = 'general'
plugin_id = 'calculator'
operators = {
ast.Add: operator.add,
ast.Sub: operator.sub,
ast.Mult: operator.mul,
ast.Div: operator.truediv,
ast.Pow: operator.pow,
ast.BitXor: operator.xor,
ast.USub: operator.neg,
}
def _eval_expr(expr):
"""
>>> _eval_expr('2^6')
4
>>> _eval_expr('2**6')
64
>>> _eval_expr('1 + 2*3**(4^5) / (6 + -7)')
-5.0
"""
return _eval(ast.parse(expr, mode='eval').body)
def _eval(node):
if isinstance(node, ast.Constant) and isinstance(node.value, int):
return node.value
if isinstance(node, ast.BinOp):
return operators[type(node.op)](_eval(node.left), _eval(node.right))
if isinstance(node, ast.UnaryOp):
return operators[type(node.op)](_eval(node.operand))
raise TypeError(node)
def post_search(_request, search):
# don't run on public instances due to possible attack surfaces
if settings['server']['public_instance']:
return True
# only show the result of the expression on the first page
if search.search_query.pageno > 1:
return True
query = search.search_query.query
# in order to avoid DoS attacks with long expressions, ignore long expressions
if len(query) > 100:
return True
# replace commonly used math operators with their proper Python operator
query = query.replace("x", "*").replace(":", "/")
# only numbers and math operators are accepted
if any(str.isalpha(c) for c in query):
return True
# in python, powers are calculated via **
query_py_formatted = query.replace("^", "**")
try:
result = str(_eval_expr(query_py_formatted))
if result != query:
search.result_container.answers['calculate'] = {'answer': f"{query} = {result}"}
except (TypeError, SyntaxError, ArithmeticError):
pass
return True
def is_allowed():
return not settings['server']['public_instance']

View File

@ -4,7 +4,6 @@
import re
from flask_babel import gettext
from searx.botdetection._helpers import get_real_ip
name = gettext('Self Information')
description = gettext('Displays your IP if the query is "ip" and your user agent if the query contains "user agent".')
@ -17,11 +16,18 @@ query_examples = ''
p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
def get_client_ip(request):
botdetection_context = getattr(request, "botdetection_context", None)
if botdetection_context:
return request.botdetection_request_info.real_ip
return request.remote_addr
def post_search(request, search):
if search.search_query.pageno > 1:
return True
if search.search_query.query == 'ip':
ip = get_real_ip(request)
ip = get_client_ip(request)
search.result_container.answers['ip'] = {'answer': ip}
elif p.match(search.search_query.query):
ua = request.user_agent

View File

@ -1,58 +1,245 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Calculate mathematical expressions using ack#eval
"""A plugin for converting measured values from one unit to another unit (a
unit converter).
The plugin looks up the symbols (given in the query term) in a list of
converters, each converter is one item in the list (compare
:py:obj:`ADDITIONAL_UNITS`). If the symbols are ambiguous, the matching units
of measurement are evaluated. The weighting in the evaluation results from the
sorting of the :py:obj:`list of unit converters<symbol_to_si>`.
Enable in ``settings.yml``:
.. code:: yaml
enabled_plugins:
..
- 'Unit converter plugin'
"""
from flask_babel import gettext
import re
import babel.numbers
from flask_babel import gettext, get_locale
from searx import data
from searx.data import WIKIDATA_UNITS
name = "Unit converter plugin"
description = gettext("Convert between units")
default_on = True
plugin_id = "unit_converter"
preference_section = "general"
CONVERT_KEYWORDS = ["in", "to", "as"]
def _convert(from_value, source_si_factor, target_si_factor):
return from_value * source_si_factor / target_si_factor
# inspired from https://stackoverflow.com/a/42475086
RE_MEASURE = r'''
(?P<sign>[-+]?) # +/- or nothing for positive
(\s*) # separator: white space or nothing
(?P<number>[\d\.,]*) # number: 1,000.00 (en) or 1.000,00 (de)
(?P<E>[eE][-+]?\d+)? # scientific notation: e(+/-)2 (*10^2)
(\s*) # separator: white space or nothing
(?P<unit>\S+) # unit of measure
'''
def _parse_text_and_convert(search, splitted_query):
if len(splitted_query) != 2 or splitted_query[0].strip() == "" or splitted_query[1].strip() == "":
ADDITIONAL_UNITS = [
{
"si_name": "Q11579",
"symbol": "°C",
"to_si": lambda val: val + 273.15,
"from_si": lambda val: val - 273.15,
},
{
"si_name": "Q11579",
"symbol": "°F",
"to_si": lambda val: (val + 459.67) * 5 / 9,
"from_si": lambda val: (val * 9 / 5) - 459.67,
},
]
"""Additional items to convert from a measure unit to a SI unit (vice versa).
.. code:: python
{
"si_name": "Q11579", # Wikidata item ID of the SI unit (Kelvin)
"symbol": "°C", # symbol of the measure unit
"to_si": lambda val: val + 273.15, # convert measure value (val) to SI unit
"from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
},
{
"si_name": "Q11573",
"symbol": "mi",
"to_si": 1609.344, # convert measure value (val) to SI unit
"from_si": 1 / 1609.344 # convert SI value (val) measure unit
},
The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
or a callable_ (val in / converted value returned).
.. _callable: https://docs.python.org/3/glossary.html#term-callable
"""
ALIAS_SYMBOLS = {
'°C': ('C',),
'°F': ('F',),
'mi': ('L',),
}
"""Alias symbols for known unit of measure symbols / by example::
'°C': ('C', ...), # list of alias symbols for °C (Q69362731)
'°F': ('F', ...), # list of alias symbols for °F (Q99490479)
'mi': ('L',), # list of alias symbols for mi (Q253276)
"""
SYMBOL_TO_SI = []
def symbol_to_si():
"""Generates a list of tuples, each tuple is a measure unit and the fields
in the tuple are:
0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
multiplied by 1609.344)
3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
100mi divided by 1609.344)
The returned list is sorted, the first items are created from
``WIKIDATA_UNITS``, the second group of items is build from
:py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
If you search this list for a symbol, then a match with a symbol from
Wikidata has the highest weighting (first hit in the list), followed by the
symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
"""
global SYMBOL_TO_SI # pylint: disable=global-statement
if SYMBOL_TO_SI:
return SYMBOL_TO_SI
# filter out units which can't be normalized to a SI unit and filter out
# units without a symbol / arcsecond does not have a symbol
# https://www.wikidata.org/wiki/Q829073
for item in data.WIKIDATA_UNITS.values():
if item['to_si_factor'] and item['symbol']:
SYMBOL_TO_SI.append(
(
item['symbol'],
item['si_name'],
item['to_si_factor'], # from_si
1 / item['to_si_factor'], # to_si
item['symbol'],
)
)
for item in ADDITIONAL_UNITS:
SYMBOL_TO_SI.append(
(
item['symbol'],
item['si_name'],
item['from_si'],
item['to_si'],
item['symbol'],
)
)
alias_items = []
for item in SYMBOL_TO_SI:
for alias in ALIAS_SYMBOLS.get(item[0], ()):
alias_items.append(
(
alias,
item[1],
item[2], # from_si
item[3], # to_si
item[0], # origin unit
)
)
SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
return SYMBOL_TO_SI
def _parse_text_and_convert(search, from_query, to_query):
# pylint: disable=too-many-branches, too-many-locals
if not (from_query and to_query):
return
from_value = ""
from_unit_key = ""
# only parse digits as value that belong together
read_alpha = False
for c in splitted_query[0]:
if not read_alpha and (c in ("-", ".") or str.isdigit(c)):
from_value += c
read_alpha = True
elif c != " ":
from_unit_key += c
to_unit_key = splitted_query[1].strip()
from_unit = None
to_unit = None
for unit in WIKIDATA_UNITS.values():
if unit['symbol'] == from_unit_key:
from_unit = unit
if unit['symbol'] == to_unit_key:
to_unit = unit
if from_unit and to_unit:
break
if from_unit is None or to_unit is None or to_unit.get('si_name') != from_unit.get('si_name'):
measured = re.match(RE_MEASURE, from_query, re.VERBOSE)
if not (measured and measured.group('number'), measured.group('unit')):
return
result = _convert(float(from_value), from_unit['to_si_factor'], to_unit['to_si_factor'])
search.result_container.answers['conversion'] = {'answer': f"{result:g} {to_unit['symbol']}"}
# Symbols are not unique, if there are several hits for the from-unit, then
# the correct one must be determined by comparing it with the to-unit
# https://github.com/searxng/searxng/pull/3378#issuecomment-2080974863
# first: collecting possible units
source_list, target_list = [], []
for symbol, si_name, from_si, to_si, orig_symbol in symbol_to_si():
if symbol == measured.group('unit'):
source_list.append((si_name, to_si))
if symbol == to_query:
target_list.append((si_name, from_si, orig_symbol))
if not (source_list and target_list):
return
source_to_si = target_from_si = target_symbol = None
# second: find the right unit by comparing list of from-units with list of to-units
for source in source_list:
for target in target_list:
if source[0] == target[0]: # compare si_name
source_to_si = source[1]
target_from_si = target[1]
target_symbol = target[2]
if not (source_to_si and target_from_si):
return
_locale = get_locale() or 'en_US'
value = measured.group('sign') + measured.group('number') + (measured.group('E') or '')
value = babel.numbers.parse_decimal(value, locale=_locale)
# convert value to SI unit
if isinstance(source_to_si, (float, int)):
value = float(value) * source_to_si
else:
value = source_to_si(float(value))
# convert value from SI unit to target unit
if isinstance(target_from_si, (float, int)):
value = float(value) * target_from_si
else:
value = target_from_si(float(value))
if measured.group('E'):
# when incomming notation is scientific, outgoing notation is scientific
result = babel.numbers.format_scientific(value, locale=_locale)
else:
result = babel.numbers.format_decimal(value, locale=_locale, format='#,##0.##########;-#')
search.result_container.answers['conversion'] = {'answer': f'{result} {target_symbol}'}
def post_search(_request, search):
@ -69,8 +256,8 @@ def post_search(_request, search):
for query_part in query_parts:
for keyword in CONVERT_KEYWORDS:
if query_part == keyword:
keyword_split = query.split(keyword, 1)
_parse_text_and_convert(search, keyword_split)
from_query, to_query = query.split(keyword, 1)
_parse_text_and_convert(search, from_query.strip(), to_query.strip())
return True
return True

View File

@ -220,6 +220,7 @@ outgoing:
# - 'Ahmia blacklist' # activation depends on outgoing.using_tor_proxy
# # these plugins are disabled if nothing is configured ..
# - 'Hostname replace' # see hostname_replace configuration below
# - 'Calculator plugin'
# - 'Open Access DOI rewrite'
# - 'Tor check plugin'
# # Read the docs before activate: auto-detection of the language could be
@ -2081,7 +2082,11 @@ engines:
engine: yacy
categories: general
search_type: text
base_url: https://yacy.searchlab.eu
base_url:
- https://yacy.searchlab.eu
- https://search.lomig.me
- https://yacy.ecosys.eu
- https://search.webproject.link
shortcut: ya
disabled: true
# required if you aren't using HTTPS for your local yacy instance
@ -2094,7 +2099,6 @@ engines:
engine: yacy
categories: images
search_type: image
base_url: https://yacy.searchlab.eu
shortcut: yai
disabled: true

View File

@ -17,9 +17,9 @@
{% else %}
<link rel="stylesheet" href="{{ url_for('static', filename='css/searxng.min.css') }}" type="text/css" media="screen" />
{% endif %}
{% if get_setting('server.limiter') or get_setting('server.public_instance') %}
<link rel="stylesheet" href="{{ url_for('client_token', token=link_token) }}" type="text/css" />
{% endif %}
{%- if botdetection_html_header is defined -%}
{{- botdetection_html_header() | safe -}}
{%- endif %}
{% block styles %}{% endblock %}
<!--[if gte IE 9]>-->
<script src="{{ url_for('static', filename='js/searxng.head.min.js') }}" client_settings="{{ client_settings }}"></script>

View File

@ -38,7 +38,7 @@
{%- macro plugin_preferences(section) -%}
{%- for plugin in plugins -%}
{%- if plugin.preference_section == section -%}
{%- if plugin.preference_section == section and (plugin.is_allowed() if plugin.is_allowed else True) -%}
<fieldset>{{- '' -}}
<legend>{{ _(plugin.name) }}</legend>{{- '' -}}
<div class="value">

View File

@ -56,7 +56,6 @@ from searx import (
from searx import infopage
from searx import limiter
from searx.botdetection import link_token
from searx.data import ENGINE_DESCRIPTIONS
from searx.results import Timing
@ -383,7 +382,6 @@ def render(template_name: str, **kwargs):
kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint
kwargs['cookies'] = request.cookies
kwargs['errors'] = request.errors
kwargs['link_token'] = link_token.get_token()
# values from the preferences
kwargs['preferences'] = request.preferences
@ -613,12 +611,6 @@ def health():
return Response('OK', mimetype='text/plain')
@app.route('/client<token>.css', methods=['GET', 'POST'])
def client_token(token=None):
link_token.ping(request, token)
return Response('', mimetype='text/css')
@app.route('/search', methods=['GET', 'POST'])
def search():
"""Search query in q and return results.
@ -1281,7 +1273,7 @@ def config():
for _ in plugins:
_plugins.append({'name': _.name, 'enabled': _.default_on})
_limiter_cfg = limiter.get_cfg()
_limiter_cfg = limiter.get_config()
return jsonify(
{
@ -1304,8 +1296,8 @@ def config():
},
'limiter': {
'enabled': limiter.is_installed(),
'botdetection.ip_limit.link_token': _limiter_cfg.get('botdetection.ip_limit.link_token'),
'botdetection.ip_lists.pass_searxng_org': _limiter_cfg.get('botdetection.ip_lists.pass_searxng_org'),
'botdetection.ip_limit.link_token': _limiter_cfg.botdetection.ip_limit.link_token,
'botdetection.ip_lists.pass_searxng_org': _limiter_cfg.botdetection.ip_lists, # fix me
},
'doi_resolvers': list(settings['doi_resolvers'].keys()),
'default_doi_resolver': settings['default_doi_resolver'],

View File

@ -3,11 +3,7 @@
from mock import Mock
from searx import (
plugins,
limiter,
botdetection,
)
from searx import plugins
from tests import SearxTestCase
@ -53,8 +49,6 @@ class SelfIPTest(SearxTestCase): # pylint: disable=missing-class-docstring
plugin = plugins.load_and_initialize_plugin('searx.plugins.self_info', False, (None, {}))
store = plugins.PluginStore()
store.register(plugin)
cfg = limiter.get_cfg()
botdetection.init(cfg, None)
self.assertTrue(len(store.plugins) == 1)

View File

@ -89,10 +89,17 @@ test.robot() {
dump_return $?
}
test.rst() {
build_msg TEST "[reST markup] ${RST_FILES[*]}"
local rst2html=rst2html
if [ "3.8" == "$(python -c 'import sys; print(".".join([str(x) for x in sys.version_info[:2]]))')" ]; then
rst2html=rst2html.py
fi
for rst in "${RST_FILES[@]}"; do
pyenv.cmd rst2html.py --halt error "$rst" > /dev/null || die 42 "fix issue in $rst"
pyenv.cmd "${rst2html}" --halt error "$rst" > /dev/null || die 42 "fix issue in $rst"
done
}