From b1557b544368b416c158c13f12946859abbe00e0 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Mon, 26 Apr 2021 11:12:02 +0200 Subject: [PATCH] [mod] processors: show identical error messages on /search and /stats --- searx/search/processors/abstract.py | 26 ++++++---- searx/search/processors/offline.py | 2 +- searx/search/processors/online.py | 12 ++--- searx/webapp.py | 76 +++++++++++++++++------------ 4 files changed, 67 insertions(+), 49 deletions(-) diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index 854f6df6a..2a36222d4 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -59,22 +59,28 @@ class EngineProcessor(ABC): key = id(key) if key else self.engine_name self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus()) - def handle_exception(self, result_container, reason, exception, suspend=False, display_exception=True): + def handle_exception(self, result_container, exception_or_message, suspend=False): # update result_container - error_message = str(exception) if display_exception and exception else None - result_container.add_unresponsive_engine(self.engine_name, reason, error_message) + if isinstance(exception_or_message, BaseException): + exception_class = exception_or_message.__class__ + module_name = getattr(exception_class, '__module__', 'builtins') + module_name = '' if module_name == 'builtins' else module_name + '.' + error_message = module_name + exception_class.__qualname__ + else: + error_message = exception_or_message + result_container.add_unresponsive_engine(self.engine_name, error_message) # metrics counter_inc('engine', self.engine_name, 'search', 'count', 'error') - if exception: - count_exception(self.engine_name, exception) + if isinstance(exception_or_message, BaseException): + count_exception(self.engine_name, exception_or_message) else: - count_error(self.engine_name, reason) + count_error(self.engine_name, exception_or_message) # suspend the engine ? if suspend: suspended_time = None - if isinstance(exception, SearxEngineAccessDeniedException): - suspended_time = exception.suspended_time - self.suspended_status.suspend(suspended_time, reason) # pylint: disable=no-member + if isinstance(exception_or_message, SearxEngineAccessDeniedException): + suspended_time = exception_or_message.suspended_time + self.suspended_status.suspend(suspended_time, error_message) # pylint: disable=no-member def _extend_container_basic(self, result_container, start_time, search_results): # update result_container @@ -91,7 +97,7 @@ class EngineProcessor(ABC): def extend_container(self, result_container, start_time, search_results): if getattr(threading.current_thread(), '_timeout', False): # the main thread is not waiting anymore - self.handle_exception(result_container, 'Timeout', None) + self.handle_exception(result_container, 'timeout', None) else: # check if the engine accepted the request if search_results is not None: diff --git a/searx/search/processors/offline.py b/searx/search/processors/offline.py index 5186b346a..ad03fed4b 100644 --- a/searx/search/processors/offline.py +++ b/searx/search/processors/offline.py @@ -22,5 +22,5 @@ class OfflineProcessor(EngineProcessor): # do not record the error logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e)) except Exception as e: - self.handle_exception(result_container, 'unexpected crash', e) + self.handle_exception(result_container, e) logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index c39937023..57422c007 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -130,7 +130,7 @@ class OnlineProcessor(EngineProcessor): self.extend_container(result_container, start_time, search_results) except (httpx.TimeoutException, asyncio.TimeoutError) as e: # requests timeout (connect or read) - self.handle_exception(result_container, 'HTTP timeout', e, suspend=True, display_exception=False) + self.handle_exception(result_container, e, suspend=True) logger.error("engine {0} : HTTP requests timeout" "(search duration : {1} s, timeout: {2} s) : {3}" .format(self.engine_name, time() - start_time, @@ -138,23 +138,23 @@ class OnlineProcessor(EngineProcessor): e.__class__.__name__)) except (httpx.HTTPError, httpx.StreamError) as e: # other requests exception - self.handle_exception(result_container, 'HTTP error', e, suspend=True, display_exception=False) + self.handle_exception(result_container, e, suspend=True) logger.exception("engine {0} : requests exception" "(search duration : {1} s, timeout: {2} s) : {3}" .format(self.engine_name, time() - start_time, timeout_limit, e)) except SearxEngineCaptchaException as e: - self.handle_exception(result_container, 'CAPTCHA required', e, suspend=True, display_exception=False) + self.handle_exception(result_container, e, suspend=True) logger.exception('engine {0} : CAPTCHA'.format(self.engine_name)) except SearxEngineTooManyRequestsException as e: - self.handle_exception(result_container, 'too many requests', e, suspend=True, display_exception=False) + self.handle_exception(result_container, e, suspend=True) logger.exception('engine {0} : Too many requests'.format(self.engine_name)) except SearxEngineAccessDeniedException as e: - self.handle_exception(result_container, 'blocked', e, suspend=True, display_exception=False) + self.handle_exception(result_container, e, suspend=True) logger.exception('engine {0} : Searx is blocked'.format(self.engine_name)) except Exception as e: - self.handle_exception(result_container, 'unexpected crash', e, display_exception=False) + self.handle_exception(result_container, e) logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) def get_default_tests(self): diff --git a/searx/webapp.py b/searx/webapp.py index 70d2d662b..b8bc60ec5 100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -172,28 +172,34 @@ _category_names = (gettext('files'), gettext('science')) # -exception_classname_to_label = { - "searx.exceptions.SearxEngineCaptchaException": gettext("CAPTCHA"), - "searx.exceptions.SearxEngineTooManyRequestsException": gettext("too many requests"), - "searx.exceptions.SearxEngineAccessDeniedException": gettext("access denied"), - "searx.exceptions.SearxEngineAPIException": gettext("server API error"), - "httpx.TimeoutException": gettext("HTTP timeout"), - "httpx.ConnectTimeout": gettext("HTTP timeout"), - "httpx.ReadTimeout": gettext("HTTP timeout"), - "httpx.WriteTimeout": gettext("HTTP timeout"), - "httpx.HTTPStatusError": gettext("HTTP error"), - "httpx.ConnectError": gettext("HTTP connection error"), - "httpx.RemoteProtocolError": gettext("HTTP protocol error"), - "httpx.LocalProtocolError": gettext("HTTP protocol error"), - "httpx.ProtocolError": gettext("HTTP protocol error"), - "httpx.ReadError": gettext("network error"), - "httpx.WriteError": gettext("network error"), - "httpx.ProxyError": gettext("proxy error"), - "searx.exceptions.SearxEngineXPathException": gettext("parsing error"), - "KeyError": gettext("parsing error"), - "json.decoder.JSONDecodeError": gettext("parsing error"), - "lxml.etree.ParserError": gettext("parsing error"), - None: gettext("unexpected crash"), +timeout_text = gettext('timeout') +parsing_error_text = gettext('parsing error') +http_protocol_error_text = gettext('HTTP protocol error') +network_error_text = gettext('network error') +exception_classname_to_text = { + None: gettext('unexpected crash'), + 'timeout': timeout_text, + 'asyncio.TimeoutError': timeout_text, + 'httpx.TimeoutException': timeout_text, + 'httpx.ConnectTimeout': timeout_text, + 'httpx.ReadTimeout': timeout_text, + 'httpx.WriteTimeout': timeout_text, + 'httpx.HTTPStatusError': gettext('HTTP error'), + 'httpx.ConnectError': gettext("HTTP connection error"), + 'httpx.RemoteProtocolError': http_protocol_error_text, + 'httpx.LocalProtocolError': http_protocol_error_text, + 'httpx.ProtocolError': http_protocol_error_text, + 'httpx.ReadError': network_error_text, + 'httpx.WriteError': network_error_text, + 'httpx.ProxyError': gettext("proxy error"), + 'searx.exceptions.SearxEngineCaptchaException': gettext("CAPTCHA"), + 'searx.exceptions.SearxEngineTooManyRequestsException': gettext("too many requests"), + 'searx.exceptions.SearxEngineAccessDeniedException': gettext("access denied"), + 'searx.exceptions.SearxEngineAPIException': gettext("server API error"), + 'searx.exceptions.SearxEngineXPathException': parsing_error_text, + 'KeyError': parsing_error_text, + 'json.decoder.JSONDecodeError': parsing_error_text, + 'lxml.etree.ParserError': parsing_error_text, } _flask_babel_get_translations = flask_babel.get_translations @@ -786,15 +792,21 @@ def search(): def __get_translated_errors(unresponsive_engines): - translated_errors = set() - for unresponsive_engine in unresponsive_engines: - error_msg = gettext(unresponsive_engine[1]) + translated_errors = [] + # make a copy unresponsive_engines to avoid "RuntimeError: Set changed size during iteration" + # it happens when an engine modifies the ResultContainer after the search_multiple_requests method + # has stopped waiting + for unresponsive_engine in list(unresponsive_engines): + error_user_text = exception_classname_to_text.get(unresponsive_engine[1]) + if not error_user_text: + error_user_text = exception_classname_to_text[None] + error_msg = gettext(error_user_text) if unresponsive_engine[2]: error_msg = "{} {}".format(error_msg, unresponsive_engine[2]) if unresponsive_engine[3]: error_msg = gettext('Suspended') + ': ' + error_msg - translated_errors.add((unresponsive_engine[0], error_msg)) - return translated_errors + translated_errors.append((unresponsive_engine[0], error_msg)) + return sorted(translated_errors, key=lambda e: e[0]) @app.route('/about', methods=['GET']) @@ -944,14 +956,14 @@ def preferences(): # the first element has the highest percentage rate. reliabilities_errors = [] for error in errors: - error_user_message = None + error_user_text = None if error.get('secondary') or 'exception_classname' not in error: continue - error_user_message = exception_classname_to_label.get(error.get('exception_classname')) + error_user_text = exception_classname_to_text.get(error.get('exception_classname')) if not error: - error_user_message = exception_classname_to_label[None] - if error_user_message not in reliabilities_errors: - reliabilities_errors.append(error_user_message) + error_user_text = exception_classname_to_text[None] + if error_user_text not in reliabilities_errors: + reliabilities_errors.append(error_user_text) reliabilities[e.name]['errors'] = reliabilities_errors # supports