1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-05 04:40:11 +01:00

Merge pull request #297 from dalf/engine-logger-enh

debug mode: more readable logging
This commit is contained in:
Alexandre Flament 2021-09-14 07:06:28 +02:00 committed by GitHub
commit 602cbc2c99
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 153 additions and 63 deletions

View File

@ -29,7 +29,7 @@ run: install
sleep 2 ; \ sleep 2 ; \
xdg-open http://127.0.0.1:8888/ ; \ xdg-open http://127.0.0.1:8888/ ; \
) & ) &
SEARX_DEBUG=1 ./manage pyenv.cmd python ./searx/webapp.py SEARX_DEBUG=1 ./manage pyenv.cmd python -m searx.webapp
PHONY += install uninstall PHONY += install uninstall
install uninstall: install uninstall:

View File

@ -17,3 +17,4 @@ linuxdoc==20210324
aiounittest==1.4.0 aiounittest==1.4.0
yamllint==1.26.3 yamllint==1.26.3
wlc==1.12 wlc==1.12
coloredlogs==15.0.1

View File

@ -2,13 +2,24 @@
# lint: pylint # lint: pylint
# pylint: disable=missing-module-docstring # pylint: disable=missing-module-docstring
import sys
import os
from os.path import dirname, abspath from os.path import dirname, abspath
import logging import logging
import searx.unixthreadname import searx.unixthreadname
import searx.settings_loader import searx.settings_loader
from searx.settings_defaults import settings_set_defaults from searx.settings_defaults import settings_set_defaults
# Debug
LOG_FORMAT_DEBUG = '%(levelname)-7s %(name)-30.30s: %(message)s'
# Production
LOG_FORMAT_PROD = '%(asctime)-15s %(levelname)s:%(name)s: %(message)s'
LOG_LEVEL_PROD = logging.WARNING
searx_dir = abspath(dirname(__file__)) searx_dir = abspath(dirname(__file__))
searx_parent_dir = abspath(dirname(dirname(__file__))) searx_parent_dir = abspath(dirname(dirname(__file__)))
settings, settings_load_message = searx.settings_loader.load_settings() settings, settings_load_message = searx.settings_loader.load_settings()
@ -16,22 +27,6 @@ settings, settings_load_message = searx.settings_loader.load_settings()
if settings is not None: if settings is not None:
settings = settings_set_defaults(settings) settings = settings_set_defaults(settings)
searx_debug = settings['general']['debug']
if searx_debug:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger('searx')
logger.info(settings_load_message)
# log max_request_timeout
max_request_timeout = settings['outgoing']['max_request_timeout']
if max_request_timeout is None:
logger.info('max_request_timeout=%s', repr(max_request_timeout))
else:
logger.info('max_request_timeout=%i second(s)', max_request_timeout)
_unset = object() _unset = object()
def get_setting(name, default=_unset): def get_setting(name, default=_unset):
@ -53,3 +48,63 @@ def get_setting(name, default=_unset):
break break
return value return value
def is_color_terminal():
if os.getenv('TERM') in ('dumb', 'unknown'):
return False
return sys.stdout.isatty()
def logging_config_debug():
try:
import coloredlogs # pylint: disable=import-outside-toplevel
except ImportError:
coloredlogs = None
log_level = os.environ.get('SEARX_DEBUG_LOG_LEVEL', 'DEBUG')
if coloredlogs and is_color_terminal():
level_styles = {
'spam': {'color': 'green', 'faint': True},
'debug': {},
'notice': {'color': 'magenta'},
'success': {'bold': True, 'color': 'green'},
'info': {'bold': True, 'color': 'cyan'},
'warning': {'color': 'yellow'},
'error': {'color': 'red'},
'critical': {'bold': True, 'color': 'red'},
}
field_styles = {
'asctime': {'color': 'green'},
'hostname': {'color': 'magenta'},
'levelname': {'color': 8},
'name': {'color': 8},
'programname': {'color': 'cyan'},
'username': {'color': 'yellow'}
}
coloredlogs.install(
level=log_level,
level_styles=level_styles,
field_styles=field_styles,
fmt=LOG_FORMAT_DEBUG
)
else:
logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG)
searx_debug = settings['general']['debug']
if searx_debug:
logging_config_debug()
else:
logging.basicConfig(level=LOG_LEVEL_PROD, format=LOG_FORMAT_PROD)
logging.root.setLevel(level=LOG_LEVEL_PROD)
logging.getLogger('werkzeug').setLevel(level=LOG_LEVEL_PROD)
logger = logging.getLogger('searx')
logger.info(settings_load_message)
# log max_request_timeout
max_request_timeout = settings['outgoing']['max_request_timeout']
if max_request_timeout is None:
logger.info('max_request_timeout=%s', repr(max_request_timeout))
else:
logger.info('max_request_timeout=%i second(s)', max_request_timeout)

View File

@ -173,7 +173,6 @@ def request(query, params):
params['url'] = search_url.format(**fargs) params['url'] = search_url.format(**fargs)
params['soft_max_redirects'] = soft_max_redirects params['soft_max_redirects'] = soft_max_redirects
logger.debug("query_url --> %s", params['url'])
return params return params

View File

@ -5,7 +5,8 @@ from urllib.parse import urlparse
from httpx import HTTPError, HTTPStatusError from httpx import HTTPError, HTTPStatusError
from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException, from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException,
SearxEngineAccessDeniedException) SearxEngineAccessDeniedException)
from searx import logger, searx_parent_dir from searx import searx_parent_dir
from searx.engines import engines
errors_per_engines = {} errors_per_engines = {}
@ -47,7 +48,7 @@ class ErrorContext:
def add_error_context(engine_name: str, error_context: ErrorContext) -> None: def add_error_context(engine_name: str, error_context: ErrorContext) -> None:
errors_for_engine = errors_per_engines.setdefault(engine_name, {}) errors_for_engine = errors_per_engines.setdefault(engine_name, {})
errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1 errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1
logger.debug('%s: %s', engine_name, str(error_context)) engines[engine_name].logger.warning('%s', str(error_context))
def get_trace(traces): def get_trace(traces):

View File

@ -194,7 +194,7 @@ def new_client(
# pylint: disable=too-many-arguments # pylint: disable=too-many-arguments
enable_http, verify, enable_http2, enable_http, verify, enable_http2,
max_connections, max_keepalive_connections, keepalive_expiry, max_connections, max_keepalive_connections, keepalive_expiry,
proxies, local_address, retries, max_redirects ): proxies, local_address, retries, max_redirects, hook_log_response ):
limit = httpx.Limits( limit = httpx.Limits(
max_connections=max_connections, max_connections=max_connections,
max_keepalive_connections=max_keepalive_connections, max_keepalive_connections=max_keepalive_connections,
@ -221,7 +221,17 @@ def new_client(
mounts['http://'] = AsyncHTTPTransportNoHttp() mounts['http://'] = AsyncHTTPTransportNoHttp()
transport = get_transport(verify, enable_http2, local_address, None, limit, retries) transport = get_transport(verify, enable_http2, local_address, None, limit, retries)
return httpx.AsyncClient(transport=transport, mounts=mounts, max_redirects=max_redirects)
event_hooks = None
if hook_log_response:
event_hooks = {'response': [ hook_log_response ]}
return httpx.AsyncClient(
transport=transport,
mounts=mounts,
max_redirects=max_redirects,
event_hooks=event_hooks,
)
def get_loop(): def get_loop():
@ -231,7 +241,7 @@ def get_loop():
def init(): def init():
# log # log
for logger_name in ('hpack.hpack', 'hpack.table'): for logger_name in ('hpack.hpack', 'hpack.table', 'httpx._client'):
logging.getLogger(logger_name).setLevel(logging.WARNING) logging.getLogger(logger_name).setLevel(logging.WARNING)
# loop # loop

View File

@ -10,9 +10,11 @@ from itertools import cycle
import httpx import httpx
from searx import logger, searx_debug
from .client import new_client, get_loop from .client import new_client, get_loop
logger = logger.getChild('network')
DEFAULT_NAME = '__DEFAULT__' DEFAULT_NAME = '__DEFAULT__'
NETWORKS = {} NETWORKS = {}
# requests compatibility when reading proxy settings from settings.yml # requests compatibility when reading proxy settings from settings.yml
@ -41,7 +43,7 @@ class Network:
'enable_http', 'verify', 'enable_http2', 'enable_http', 'verify', 'enable_http2',
'max_connections', 'max_keepalive_connections', 'keepalive_expiry', 'max_connections', 'max_keepalive_connections', 'keepalive_expiry',
'local_addresses', 'proxies', 'max_redirects', 'retries', 'retry_on_http_error', 'local_addresses', 'proxies', 'max_redirects', 'retries', 'retry_on_http_error',
'_local_addresses_cycle', '_proxies_cycle', '_clients' '_local_addresses_cycle', '_proxies_cycle', '_clients', '_logger'
) )
def __init__( def __init__(
@ -57,7 +59,8 @@ class Network:
local_addresses=None, local_addresses=None,
retries=0, retries=0,
retry_on_http_error=None, retry_on_http_error=None,
max_redirects=30 ): max_redirects=30,
logger_name=None):
self.enable_http = enable_http self.enable_http = enable_http
self.verify = verify self.verify = verify
@ -73,6 +76,7 @@ class Network:
self._local_addresses_cycle = self.get_ipaddress_cycle() self._local_addresses_cycle = self.get_ipaddress_cycle()
self._proxies_cycle = self.get_proxy_cycles() self._proxies_cycle = self.get_proxy_cycles()
self._clients = {} self._clients = {}
self._logger = logger.getChild(logger_name) if logger_name else logger
self.check_parameters() self.check_parameters()
def check_parameters(self): def check_parameters(self):
@ -130,12 +134,25 @@ class Network:
# pylint: disable=stop-iteration-return # pylint: disable=stop-iteration-return
yield tuple((pattern, next(proxy_url_cycle)) for pattern, proxy_url_cycle in proxy_settings.items()) yield tuple((pattern, next(proxy_url_cycle)) for pattern, proxy_url_cycle in proxy_settings.items())
async def log_response(self, response: httpx.Response):
request = response.request
status = f"{response.status_code} {response.reason_phrase}"
response_line = f"{response.http_version} {status}"
if hasattr(response, "_elapsed"):
elapsed_time = f"{response.elapsed.total_seconds()} sec"
else:
elapsed_time = "stream"
self._logger.debug(
f'HTTP Request: {request.method} {request.url} "{response_line}" ({elapsed_time})'
)
def get_client(self, verify=None, max_redirects=None): def get_client(self, verify=None, max_redirects=None):
verify = self.verify if verify is None else verify verify = self.verify if verify is None else verify
max_redirects = self.max_redirects if max_redirects is None else max_redirects max_redirects = self.max_redirects if max_redirects is None else max_redirects
local_address = next(self._local_addresses_cycle) local_address = next(self._local_addresses_cycle)
proxies = next(self._proxies_cycle) # is a tuple so it can be part of the key proxies = next(self._proxies_cycle) # is a tuple so it can be part of the key
key = (verify, max_redirects, local_address, proxies) key = (verify, max_redirects, local_address, proxies)
hook_log_response = self.log_response if searx_debug else None
if key not in self._clients or self._clients[key].is_closed: if key not in self._clients or self._clients[key].is_closed:
self._clients[key] = new_client( self._clients[key] = new_client(
self.enable_http, self.enable_http,
@ -147,7 +164,8 @@ class Network:
dict(proxies), dict(proxies),
local_address, local_address,
0, 0,
max_redirects max_redirects,
hook_log_response
) )
return self._clients[key] return self._clients[key]
@ -243,11 +261,13 @@ def initialize(settings_engines=None, settings_outgoing=None):
'retry_on_http_error': None, 'retry_on_http_error': None,
} }
def new_network(params): def new_network(params, logger_name=None):
nonlocal default_params nonlocal default_params
result = {} result = {}
result.update(default_params) result.update(default_params)
result.update(params) result.update(params)
if logger_name:
result['logger_name'] = logger_name
return Network(**result) return Network(**result)
def iter_networks(): def iter_networks():
@ -263,13 +283,13 @@ def initialize(settings_engines=None, settings_outgoing=None):
if NETWORKS: if NETWORKS:
done() done()
NETWORKS.clear() NETWORKS.clear()
NETWORKS[DEFAULT_NAME] = new_network({}) NETWORKS[DEFAULT_NAME] = new_network({}, logger_name='default')
NETWORKS['ipv4'] = new_network({'local_addresses': '0.0.0.0'}) NETWORKS['ipv4'] = new_network({'local_addresses': '0.0.0.0'}, logger_name='ipv4')
NETWORKS['ipv6'] = new_network({'local_addresses': '::'}) NETWORKS['ipv6'] = new_network({'local_addresses': '::'}, logger_name='ipv6')
# define networks from outgoing.networks # define networks from outgoing.networks
for network_name, network in settings_outgoing['networks'].items(): for network_name, network in settings_outgoing['networks'].items():
NETWORKS[network_name] = new_network(network) NETWORKS[network_name] = new_network(network, logger_name=network_name)
# define networks from engines.[i].network (except references) # define networks from engines.[i].network (except references)
for engine_name, engine, network in iter_networks(): for engine_name, engine, network in iter_networks():
@ -280,9 +300,9 @@ def initialize(settings_engines=None, settings_outgoing=None):
network[attribute_name] = getattr(engine, attribute_name) network[attribute_name] = getattr(engine, attribute_name)
else: else:
network[attribute_name] = attribute_value network[attribute_name] = attribute_value
NETWORKS[engine_name] = new_network(network) NETWORKS[engine_name] = new_network(network, logger_name=engine_name)
elif isinstance(network, dict): elif isinstance(network, dict):
NETWORKS[engine_name] = new_network(network) NETWORKS[engine_name] = new_network(network, logger_name=engine_name)
# define networks from engines.[i].network (references) # define networks from engines.[i].network (references)
for engine_name, engine, network in iter_networks(): for engine_name, engine, network in iter_networks():
@ -295,7 +315,7 @@ def initialize(settings_engines=None, settings_outgoing=None):
if 'image_proxy' not in NETWORKS: if 'image_proxy' not in NETWORKS:
image_proxy_params = default_params.copy() image_proxy_params = default_params.copy()
image_proxy_params['enable_http2'] = False image_proxy_params['enable_http2'] = False
NETWORKS['image_proxy'] = new_network(image_proxy_params) NETWORKS['image_proxy'] = new_network(image_proxy_params, logger_name='image_proxy')
@atexit.register @atexit.register

View File

@ -147,7 +147,7 @@ class Search:
if th.is_alive(): if th.is_alive():
th._timeout = True th._timeout = True
self.result_container.add_unresponsive_engine(th._engine_name, 'timeout') self.result_container.add_unresponsive_engine(th._engine_name, 'timeout')
logger.warning('engine timeout: {0}'.format(th._engine_name)) PROCESSORS[th._engine_name].logger.error('engine timeout')
def search_standard(self): def search_standard(self):
""" """

View File

@ -65,6 +65,6 @@ def initialize(engine_list):
processor = get_processor(engine, engine_name) processor = get_processor(engine, engine_name)
initialize_processor(processor) initialize_processor(processor)
if processor is None: if processor is None:
logger.error('Error get processor for engine %s', engine_name) engine.logger.error('Error get processor for engine %s', engine_name)
else: else:
PROCESSORS[engine_name] = processor PROCESSORS[engine_name] = processor

View File

@ -9,8 +9,8 @@ import threading
from abc import abstractmethod, ABC from abc import abstractmethod, ABC
from timeit import default_timer from timeit import default_timer
from searx import logger from searx import settings, logger
from searx.engines import settings from searx.engines import engines
from searx.network import get_time_for_thread, get_network from searx.network import get_time_for_thread, get_network
from searx.metrics import histogram_observe, counter_inc, count_exception, count_error from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException
@ -43,7 +43,7 @@ class SuspendedStatus:
self.continuous_errors * settings['search']['ban_time_on_fail']) self.continuous_errors * settings['search']['ban_time_on_fail'])
self.suspend_end_time = default_timer() + suspended_time self.suspend_end_time = default_timer() + suspended_time
self.suspend_reason = suspend_reason self.suspend_reason = suspend_reason
logger.debug('Suspend engine for %i seconds', suspended_time) logger.debug('Suspend for %i seconds', suspended_time)
def resume(self): def resume(self):
with self.lock: with self.lock:
@ -56,11 +56,12 @@ class SuspendedStatus:
class EngineProcessor(ABC): class EngineProcessor(ABC):
"""Base classes used for all types of reqest processores.""" """Base classes used for all types of reqest processores."""
__slots__ = 'engine', 'engine_name', 'lock', 'suspended_status' __slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger'
def __init__(self, engine, engine_name): def __init__(self, engine, engine_name):
self.engine = engine self.engine = engine
self.engine_name = engine_name self.engine_name = engine_name
self.logger = engines[engine_name].logger
key = get_network(self.engine_name) key = get_network(self.engine_name)
key = id(key) if key else self.engine_name key = id(key) if key else self.engine_name
self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus()) self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
@ -69,11 +70,11 @@ class EngineProcessor(ABC):
try: try:
self.engine.init(get_engine_from_settings(self.engine_name)) self.engine.init(get_engine_from_settings(self.engine_name))
except SearxEngineResponseException as exc: except SearxEngineResponseException as exc:
logger.warn('%s engine: Fail to initialize // %s', self.engine_name, exc) self.logger.warn('Fail to initialize // %s', exc)
except Exception: # pylint: disable=broad-except except Exception: # pylint: disable=broad-except
logger.exception('%s engine: Fail to initialize', self.engine_name) self.logger.exception('Fail to initialize')
else: else:
logger.debug('%s engine: Initialized', self.engine_name) self.logger.debug('Initialized')
@property @property
def has_initialize_function(self): def has_initialize_function(self):

View File

@ -5,10 +5,8 @@
""" """
from searx import logger
from .abstract import EngineProcessor from .abstract import EngineProcessor
logger = logger.getChild('searx.search.processor.offline')
class OfflineProcessor(EngineProcessor): class OfflineProcessor(EngineProcessor):
"""Processor class used by ``offline`` engines""" """Processor class used by ``offline`` engines"""
@ -24,7 +22,7 @@ class OfflineProcessor(EngineProcessor):
self.extend_container(result_container, start_time, search_results) self.extend_container(result_container, start_time, search_results)
except ValueError as e: except ValueError as e:
# do not record the error # do not record the error
logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e)) self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e))
except Exception as e: # pylint: disable=broad-except except Exception as e: # pylint: disable=broad-except
self.handle_exception(result_container, e) self.handle_exception(result_container, e)
logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) self.logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))

View File

@ -10,7 +10,6 @@ import asyncio
import httpx import httpx
import searx.network import searx.network
from searx import logger
from searx.utils import gen_useragent from searx.utils import gen_useragent
from searx.exceptions import ( from searx.exceptions import (
SearxEngineAccessDeniedException, SearxEngineAccessDeniedException,
@ -20,7 +19,6 @@ from searx.exceptions import (
from searx.metrics.error_recorder import count_error from searx.metrics.error_recorder import count_error
from .abstract import EngineProcessor from .abstract import EngineProcessor
logger = logger.getChild('searx.search.processor.online')
def default_request_params(): def default_request_params():
"""Default request parameters for ``online`` engines.""" """Default request parameters for ``online`` engines."""
@ -146,31 +144,37 @@ class OnlineProcessor(EngineProcessor):
except (httpx.TimeoutException, asyncio.TimeoutError) as e: except (httpx.TimeoutException, asyncio.TimeoutError) as e:
# requests timeout (connect or read) # requests timeout (connect or read)
self.handle_exception(result_container, e, suspend=True) self.handle_exception(result_container, e, suspend=True)
logger.error("engine {0} : HTTP requests timeout" self.logger.error(
"(search duration : {1} s, timeout: {2} s) : {3}" "HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}"
.format(self.engine_name, default_timer() - start_time, .format(
timeout_limit, default_timer() - start_time,
e.__class__.__name__)) timeout_limit,
e.__class__.__name__
)
)
except (httpx.HTTPError, httpx.StreamError) as e: except (httpx.HTTPError, httpx.StreamError) as e:
# other requests exception # other requests exception
self.handle_exception(result_container, e, suspend=True) self.handle_exception(result_container, e, suspend=True)
logger.exception("engine {0} : requests exception" self.logger.exception(
"(search duration : {1} s, timeout: {2} s) : {3}" "requests exception (search duration : {0} s, timeout: {1} s) : {2}"
.format(self.engine_name, default_timer() - start_time, .format(
timeout_limit, default_timer() - start_time,
e)) timeout_limit,
e
)
)
except SearxEngineCaptchaException as e: except SearxEngineCaptchaException as e:
self.handle_exception(result_container, e, suspend=True) self.handle_exception(result_container, e, suspend=True)
logger.exception('engine {0} : CAPTCHA'.format(self.engine_name)) self.logger.exception('CAPTCHA')
except SearxEngineTooManyRequestsException as e: except SearxEngineTooManyRequestsException as e:
self.handle_exception(result_container, e, suspend=True) self.handle_exception(result_container, e, suspend=True)
logger.exception('engine {0} : Too many requests'.format(self.engine_name)) self.logger.exception('Too many requests')
except SearxEngineAccessDeniedException as e: except SearxEngineAccessDeniedException as e:
self.handle_exception(result_container, e, suspend=True) self.handle_exception(result_container, e, suspend=True)
logger.exception('engine {0} : Searx is blocked'.format(self.engine_name)) self.logger.exception('Searx is blocked')
except Exception as e: # pylint: disable=broad-except except Exception as e: # pylint: disable=broad-except
self.handle_exception(result_container, e) self.handle_exception(result_container, e)
logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) self.logger.exception('exception : {0}'.format(e))
def get_default_tests(self): def get_default_tests(self):
tests = {} tests = {}

View File

@ -3,6 +3,7 @@ import os
import aiounittest import aiounittest
os.environ['SEARX_DEBUG'] = '1' os.environ['SEARX_DEBUG'] = '1'
os.environ['SEARX_DEBUG_LOG_LEVEL'] = 'WARNING'
os.environ['SEARX_DISABLE_ETC_SETTINGS'] = '1' os.environ['SEARX_DISABLE_ETC_SETTINGS'] = '1'
os.environ.pop('SEARX_SETTINGS_PATH', None) os.environ.pop('SEARX_SETTINGS_PATH', None)