1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-17 18:00:12 +01:00

[enh] rewrite and enhance metrics

This commit is contained in:
Alexandre Flament 2021-04-14 17:23:15 +02:00
parent aae7830d14
commit 7acd7ffc02
13 changed files with 427 additions and 210 deletions

View File

@ -21,7 +21,6 @@ import threading
from os.path import realpath, dirname from os.path import realpath, dirname
from babel.localedata import locale_identifiers from babel.localedata import locale_identifiers
from urllib.parse import urlparse from urllib.parse import urlparse
from flask_babel import gettext
from operator import itemgetter from operator import itemgetter
from searx import settings from searx import settings
from searx import logger from searx import logger
@ -136,22 +135,6 @@ def load_engine(engine_data):
setattr(engine, 'fetch_supported_languages', setattr(engine, 'fetch_supported_languages',
lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers))) lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers)))
engine.stats = {
'sent_search_count': 0, # sent search
'search_count': 0, # succesful search
'result_count': 0,
'engine_time': 0,
'engine_time_count': 0,
'score_count': 0,
'errors': 0
}
engine_type = getattr(engine, 'engine_type', 'online')
if engine_type != 'offline':
engine.stats['page_load_time'] = 0
engine.stats['page_load_count'] = 0
# tor related settings # tor related settings
if settings['outgoing'].get('using_tor_proxy'): if settings['outgoing'].get('using_tor_proxy'):
# use onion url if using tor. # use onion url if using tor.
@ -175,103 +158,6 @@ def load_engine(engine_data):
return engine return engine
def to_percentage(stats, maxvalue):
for engine_stat in stats:
if maxvalue:
engine_stat['percentage'] = int(engine_stat['avg'] / maxvalue * 100)
else:
engine_stat['percentage'] = 0
return stats
def get_engines_stats(preferences):
# TODO refactor
pageloads = []
engine_times = []
results = []
scores = []
errors = []
scores_per_result = []
max_pageload = max_engine_times = max_results = max_score = max_errors = max_score_per_result = 0 # noqa
for engine in engines.values():
if not preferences.validate_token(engine):
continue
if engine.stats['search_count'] == 0:
continue
results_num = \
engine.stats['result_count'] / float(engine.stats['search_count'])
if engine.stats['engine_time_count'] != 0:
this_engine_time = engine.stats['engine_time'] / float(engine.stats['engine_time_count']) # noqa
else:
this_engine_time = 0
if results_num:
score = engine.stats['score_count'] / float(engine.stats['search_count']) # noqa
score_per_result = score / results_num
else:
score = score_per_result = 0.0
if engine.engine_type != 'offline':
load_times = 0
if engine.stats['page_load_count'] != 0:
load_times = engine.stats['page_load_time'] / float(engine.stats['page_load_count']) # noqa
max_pageload = max(load_times, max_pageload)
pageloads.append({'avg': load_times, 'name': engine.name})
max_engine_times = max(this_engine_time, max_engine_times)
max_results = max(results_num, max_results)
max_score = max(score, max_score)
max_score_per_result = max(score_per_result, max_score_per_result)
max_errors = max(max_errors, engine.stats['errors'])
engine_times.append({'avg': this_engine_time, 'name': engine.name})
results.append({'avg': results_num, 'name': engine.name})
scores.append({'avg': score, 'name': engine.name})
errors.append({'avg': engine.stats['errors'], 'name': engine.name})
scores_per_result.append({
'avg': score_per_result,
'name': engine.name
})
pageloads = to_percentage(pageloads, max_pageload)
engine_times = to_percentage(engine_times, max_engine_times)
results = to_percentage(results, max_results)
scores = to_percentage(scores, max_score)
scores_per_result = to_percentage(scores_per_result, max_score_per_result)
errors = to_percentage(errors, max_errors)
return [
(
gettext('Engine time (sec)'),
sorted(engine_times, key=itemgetter('avg'))
),
(
gettext('Page loads (sec)'),
sorted(pageloads, key=itemgetter('avg'))
),
(
gettext('Number of results'),
sorted(results, key=itemgetter('avg'), reverse=True)
),
(
gettext('Scores'),
sorted(scores, key=itemgetter('avg'), reverse=True)
),
(
gettext('Scores per result'),
sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
),
(
gettext('Errors'),
sorted(errors, key=itemgetter('avg'), reverse=True)
),
]
def load_engines(engine_list): def load_engines(engine_list):
global engines, engine_shortcuts global engines, engine_shortcuts
engines.clear() engines.clear()

206
searx/metrics/__init__.py Normal file
View File

@ -0,0 +1,206 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
import typing
import math
import contextlib
from timeit import default_timer
from operator import itemgetter
from searx.engines import engines
from .models import HistogramStorage, CounterStorage
from .error_recorder import count_error, count_exception, errors_per_engines
__all__ = ["initialize",
"get_engines_stats", "get_engine_errors",
"histogram", "histogram_observe", "histogram_observe_time",
"counter", "counter_inc", "counter_add",
"count_error", "count_exception"]
ENDPOINTS = {'search'}
histogram_storage: typing.Optional[HistogramStorage] = None
counter_storage: typing.Optional[CounterStorage] = None
@contextlib.contextmanager
def histogram_observe_time(*args):
h = histogram_storage.get(*args)
before = default_timer()
yield before
duration = default_timer() - before
if h:
h.observe(duration)
else:
raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
def histogram_observe(duration, *args):
histogram_storage.get(*args).observe(duration)
def histogram(*args, raise_on_not_found=True):
h = histogram_storage.get(*args)
if raise_on_not_found and h is None:
raise ValueError("histogram " + repr((*args,)) + " doesn't not exist")
return h
def counter_inc(*args):
counter_storage.add(1, *args)
def counter_add(value, *args):
counter_storage.add(value, *args)
def counter(*args):
return counter_storage.get(*args)
def initialize(engine_names=None):
"""
Initialize metrics
"""
global counter_storage, histogram_storage
counter_storage = CounterStorage()
histogram_storage = HistogramStorage()
# max_timeout = max of all the engine.timeout
max_timeout = 2
for engine_name in (engine_names or engines):
if engine_name in engines:
max_timeout = max(max_timeout, engines[engine_name].timeout)
# histogram configuration
histogram_width = 0.1
histogram_size = int(1.5 * max_timeout / histogram_width)
# engines
for engine_name in (engine_names or engines):
# search count
counter_storage.configure('engine', engine_name, 'search', 'count', 'sent')
counter_storage.configure('engine', engine_name, 'search', 'count', 'successful')
# global counter of errors
counter_storage.configure('engine', engine_name, 'search', 'count', 'error')
# score of the engine
counter_storage.configure('engine', engine_name, 'score')
# result count per requests
histogram_storage.configure(1, 100, 'engine', engine_name, 'result', 'count')
# time doing HTTP requests
histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'http')
# total time
# .time.request and ...response times may overlap .time.http time.
histogram_storage.configure(histogram_width, histogram_size, 'engine', engine_name, 'time', 'total')
def get_engine_errors(engline_list):
result = {}
engine_names = list(errors_per_engines.keys())
engine_names.sort()
for engine_name in engine_names:
if engine_name not in engline_list:
continue
error_stats = errors_per_engines[engine_name]
sent_search_count = max(counter('engine', engine_name, 'search', 'count', 'sent'), 1)
sorted_context_count_list = sorted(error_stats.items(), key=lambda context_count: context_count[1])
r = []
for context, count in sorted_context_count_list:
percentage = round(20 * count / sent_search_count) * 5
r.append({
'filename': context.filename,
'function': context.function,
'line_no': context.line_no,
'code': context.code,
'exception_classname': context.exception_classname,
'log_message': context.log_message,
'log_parameters': context.log_parameters,
'secondary': context.secondary,
'percentage': percentage,
})
result[engine_name] = sorted(r, reverse=True, key=lambda d: d['percentage'])
return result
def to_percentage(stats, maxvalue):
for engine_stat in stats:
if maxvalue:
engine_stat['percentage'] = int(engine_stat['avg'] / maxvalue * 100)
else:
engine_stat['percentage'] = 0
return stats
def get_engines_stats(engine_list):
global counter_storage, histogram_storage
assert counter_storage is not None
assert histogram_storage is not None
list_time = []
list_time_http = []
list_time_total = []
list_result_count = []
list_error_count = []
list_scores = []
list_scores_per_result = []
max_error_count = max_http_time = max_time_total = max_result_count = max_score = None # noqa
for engine_name in engine_list:
error_count = counter('engine', engine_name, 'search', 'count', 'error')
if counter('engine', engine_name, 'search', 'count', 'sent') > 0:
list_error_count.append({'avg': error_count, 'name': engine_name})
max_error_count = max(error_count, max_error_count or 0)
successful_count = counter('engine', engine_name, 'search', 'count', 'successful')
if successful_count == 0:
continue
result_count_sum = histogram('engine', engine_name, 'result', 'count').sum
time_total = histogram('engine', engine_name, 'time', 'total').percentage(50)
time_http = histogram('engine', engine_name, 'time', 'http').percentage(50)
result_count = result_count_sum / float(successful_count)
if result_count:
score = counter('engine', engine_name, 'score') # noqa
score_per_result = score / float(result_count_sum)
else:
score = score_per_result = 0.0
max_time_total = max(time_total, max_time_total or 0)
max_http_time = max(time_http, max_http_time or 0)
max_result_count = max(result_count, max_result_count or 0)
max_score = max(score, max_score or 0)
list_time.append({'total': round(time_total, 1),
'http': round(time_http, 1),
'name': engine_name,
'processing': round(time_total - time_http, 1)})
list_time_total.append({'avg': time_total, 'name': engine_name})
list_time_http.append({'avg': time_http, 'name': engine_name})
list_result_count.append({'avg': result_count, 'name': engine_name})
list_scores.append({'avg': score, 'name': engine_name})
list_scores_per_result.append({'avg': score_per_result, 'name': engine_name})
list_time = sorted(list_time, key=itemgetter('total'))
list_time_total = sorted(to_percentage(list_time_total, max_time_total), key=itemgetter('avg'))
list_time_http = sorted(to_percentage(list_time_http, max_http_time), key=itemgetter('avg'))
list_result_count = sorted(to_percentage(list_result_count, max_result_count), key=itemgetter('avg'), reverse=True)
list_scores = sorted(list_scores, key=itemgetter('avg'), reverse=True)
list_scores_per_result = sorted(list_scores_per_result, key=itemgetter('avg'), reverse=True)
list_error_count = sorted(to_percentage(list_error_count, max_error_count), key=itemgetter('avg'), reverse=True)
return {
'time': list_time,
'max_time': math.ceil(max_time_total or 0),
'time_total': list_time_total,
'time_http': list_time_http,
'result_count': list_result_count,
'scores': list_scores,
'scores_per_result': list_scores_per_result,
'error_count': list_error_count,
}

View File

@ -1,6 +1,5 @@
import typing import typing
import inspect import inspect
import logging
from json import JSONDecodeError from json import JSONDecodeError
from urllib.parse import urlparse from urllib.parse import urlparse
from httpx import HTTPError, HTTPStatusError from httpx import HTTPError, HTTPStatusError
@ -9,8 +8,6 @@ from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathExcepti
from searx import logger from searx import logger
logging.basicConfig(level=logging.INFO)
errors_per_engines = {} errors_per_engines = {}
@ -124,7 +121,7 @@ def get_error_context(framerecords, exception_classname, log_message, log_parame
return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters) return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters)
def record_exception(engine_name: str, exc: Exception) -> None: def count_exception(engine_name: str, exc: Exception) -> None:
framerecords = inspect.trace() framerecords = inspect.trace()
try: try:
exception_classname = get_exception_classname(exc) exception_classname = get_exception_classname(exc)
@ -135,7 +132,7 @@ def record_exception(engine_name: str, exc: Exception) -> None:
del framerecords del framerecords
def record_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None) -> None: def count_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None) -> None:
framerecords = list(reversed(inspect.stack()[1:])) framerecords = list(reversed(inspect.stack()[1:]))
try: try:
error_context = get_error_context(framerecords, None, log_message, log_parameters or ()) error_context = get_error_context(framerecords, None, log_message, log_parameters or ())

156
searx/metrics/models.py Normal file
View File

@ -0,0 +1,156 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
import decimal
import threading
from searx import logger
__all__ = ["Histogram", "HistogramStorage", "CounterStorage"]
logger = logger.getChild('searx.metrics')
class Histogram:
_slots__ = '_lock', '_size', '_sum', '_quartiles', '_count', '_width'
def __init__(self, width=10, size=200):
self._lock = threading.Lock()
self._width = width
self._size = size
self._quartiles = [0] * size
self._count = 0
self._sum = 0
def observe(self, value):
q = int(value / self._width)
if q < 0:
"""Value below zero is ignored"""
q = 0
if q >= self._size:
"""Value above the maximum is replaced by the maximum"""
q = self._size - 1
with self._lock:
self._quartiles[q] += 1
self._count += 1
self._sum += value
@property
def quartiles(self):
return list(self._quartiles)
@property
def count(self):
return self._count
@property
def sum(self):
return self._sum
@property
def average(self):
with self._lock:
if self._count != 0:
return self._sum / self._count
else:
return 0
@property
def quartile_percentage(self):
''' Quartile in percentage '''
with self._lock:
if self._count > 0:
return [int(q * 100 / self._count) for q in self._quartiles]
else:
return self._quartiles
@property
def quartile_percentage_map(self):
result = {}
# use Decimal to avoid rounding errors
x = decimal.Decimal(0)
width = decimal.Decimal(self._width)
width_exponent = -width.as_tuple().exponent
with self._lock:
if self._count > 0:
for y in self._quartiles:
yp = int(y * 100 / self._count)
if yp != 0:
result[round(float(x), width_exponent)] = yp
x += width
return result
def percentage(self, percentage):
# use Decimal to avoid rounding errors
x = decimal.Decimal(0)
width = decimal.Decimal(self._width)
stop_at_value = decimal.Decimal(self._count) / 100 * percentage
sum_value = 0
with self._lock:
if self._count > 0:
for y in self._quartiles:
sum_value += y
if sum_value >= stop_at_value:
return x
x += width
return None
def __repr__(self):
return "Histogram<avg: " + str(self.average) + ", count: " + str(self._count) + ">"
class HistogramStorage:
__slots__ = 'measures'
def __init__(self):
self.clear()
def clear(self):
self.measures = {}
def configure(self, width, size, *args):
measure = Histogram(width, size)
self.measures[args] = measure
return measure
def get(self, *args):
return self.measures.get(args, None)
def dump(self):
logger.debug("Histograms:")
ks = sorted(self.measures.keys(), key='/'.join)
for k in ks:
logger.debug("- %-60s %s", '|'.join(k), self.measures[k])
class CounterStorage:
__slots__ = 'counters', 'lock'
def __init__(self):
self.lock = threading.Lock()
self.clear()
def clear(self):
with self.lock:
self.counters = {}
def configure(self, *args):
with self.lock:
self.counters[args] = 0
def get(self, *args):
return self.counters[args]
def add(self, value, *args):
with self.lock:
self.counters[args] += value
def dump(self):
with self.lock:
ks = sorted(self.counters.keys(), key='/'.join)
logger.debug("Counters:")
for k in ks:
logger.debug("- %-60s %s", '|'.join(k), self.counters[k])

View File

@ -3,7 +3,7 @@
import asyncio import asyncio
import threading import threading
import concurrent.futures import concurrent.futures
from time import time from timeit import default_timer
import httpx import httpx
import h2.exceptions import h2.exceptions
@ -65,7 +65,7 @@ def get_context_network():
def request(method, url, **kwargs): def request(method, url, **kwargs):
"""same as requests/requests/api.py request(...)""" """same as requests/requests/api.py request(...)"""
time_before_request = time() time_before_request = default_timer()
# timeout (httpx) # timeout (httpx)
if 'timeout' in kwargs: if 'timeout' in kwargs:
@ -82,7 +82,7 @@ def request(method, url, **kwargs):
timeout += 0.2 # overhead timeout += 0.2 # overhead
start_time = getattr(THREADLOCAL, 'start_time', time_before_request) start_time = getattr(THREADLOCAL, 'start_time', time_before_request)
if start_time: if start_time:
timeout -= time() - start_time timeout -= default_timer() - start_time
# raise_for_error # raise_for_error
check_for_httperror = True check_for_httperror = True
@ -111,7 +111,7 @@ def request(method, url, **kwargs):
# update total_time. # update total_time.
# See get_time_for_thread() and reset_time_for_thread() # See get_time_for_thread() and reset_time_for_thread()
if hasattr(THREADLOCAL, 'total_time'): if hasattr(THREADLOCAL, 'total_time'):
time_after_request = time() time_after_request = default_timer()
THREADLOCAL.total_time += time_after_request - time_before_request THREADLOCAL.total_time += time_after_request - time_before_request
# raise an exception # raise an exception

View File

@ -0,0 +1,2 @@
# compatibility with searx/searx
from searx.network import raise_for_httperror

View File

@ -5,7 +5,7 @@ from threading import RLock
from urllib.parse import urlparse, unquote from urllib.parse import urlparse, unquote
from searx import logger from searx import logger
from searx.engines import engines from searx.engines import engines
from searx.metrology.error_recorder import record_error from searx.metrics import histogram_observe, counter_add, count_error
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
@ -196,12 +196,10 @@ class ResultContainer:
if len(error_msgs) > 0: if len(error_msgs) > 0:
for msg in error_msgs: for msg in error_msgs:
record_error(engine_name, 'some results are invalids: ' + msg) count_error(engine_name, 'some results are invalids: ' + msg)
if engine_name in engines: if engine_name in engines:
with RLock(): histogram_observe(standard_result_count, 'engine', engine_name, 'result', 'count')
engines[engine_name].stats['search_count'] += 1
engines[engine_name].stats['result_count'] += standard_result_count
if not self.paging and standard_result_count > 0 and engine_name in engines\ if not self.paging and standard_result_count > 0 and engine_name in engines\
and engines[engine_name].paging: and engines[engine_name].paging:
@ -301,9 +299,8 @@ class ResultContainer:
for result in self._merged_results: for result in self._merged_results:
score = result_score(result) score = result_score(result)
result['score'] = score result['score'] = score
with RLock(): for result_engine in result['engines']:
for result_engine in result['engines']: counter_add(score, 'engine', result_engine, 'score')
engines[result_engine].stats['score_count'] += score
results = sorted(self._merged_results, key=itemgetter('score'), reverse=True) results = sorted(self._merged_results, key=itemgetter('score'), reverse=True)

View File

@ -18,7 +18,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
import typing import typing
import gc import gc
import threading import threading
from time import time from timeit import default_timer
from uuid import uuid4 from uuid import uuid4
from _thread import start_new_thread from _thread import start_new_thread
@ -31,6 +31,7 @@ from searx.plugins import plugins
from searx.search.models import EngineRef, SearchQuery from searx.search.models import EngineRef, SearchQuery
from searx.search.processors import processors, initialize as initialize_processors from searx.search.processors import processors, initialize as initialize_processors
from searx.search.checker import initialize as initialize_checker from searx.search.checker import initialize as initialize_checker
from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time
logger = logger.getChild('search') logger = logger.getChild('search')
@ -50,6 +51,7 @@ else:
def initialize(settings_engines=None, enable_checker=False): def initialize(settings_engines=None, enable_checker=False):
settings_engines = settings_engines or settings['engines'] settings_engines = settings_engines or settings['engines']
initialize_processors(settings_engines) initialize_processors(settings_engines)
initialize_metrics([engine['name'] for engine in settings_engines])
if enable_checker: if enable_checker:
initialize_checker() initialize_checker()
@ -115,8 +117,7 @@ class Search:
if request_params is None: if request_params is None:
continue continue
with processor.lock: counter_inc('engine', engineref.name, 'search', 'count', 'sent')
processor.engine.stats['sent_search_count'] += 1
# append request to list # append request to list
requests.append((engineref.name, self.search_query.query, request_params)) requests.append((engineref.name, self.search_query.query, request_params))
@ -161,7 +162,7 @@ class Search:
for th in threading.enumerate(): for th in threading.enumerate():
if th.name == search_id: if th.name == search_id:
remaining_time = max(0.0, self.actual_timeout - (time() - self.start_time)) remaining_time = max(0.0, self.actual_timeout - (default_timer() - self.start_time))
th.join(remaining_time) th.join(remaining_time)
if th.is_alive(): if th.is_alive():
th._timeout = True th._timeout = True
@ -184,12 +185,10 @@ class Search:
# do search-request # do search-request
def search(self): def search(self):
self.start_time = time() self.start_time = default_timer()
if not self.search_external_bang(): if not self.search_external_bang():
if not self.search_answerers(): if not self.search_answerers():
self.search_standard() self.search_standard()
return self.result_container return self.result_container

View File

@ -16,6 +16,7 @@ from searx import network, logger
from searx.results import ResultContainer from searx.results import ResultContainer
from searx.search.models import SearchQuery, EngineRef from searx.search.models import SearchQuery, EngineRef
from searx.search.processors import EngineProcessor from searx.search.processors import EngineProcessor
from searx.metrics import counter_inc
logger = logger.getChild('searx.search.checker') logger = logger.getChild('searx.search.checker')
@ -384,8 +385,7 @@ class Checker:
engineref_category = search_query.engineref_list[0].category engineref_category = search_query.engineref_list[0].category
params = self.processor.get_params(search_query, engineref_category) params = self.processor.get_params(search_query, engineref_category)
if params is not None: if params is not None:
with self.processor.lock: counter_inc('engine', search_query.engineref_list[0].name, 'search', 'count', 'sent')
self.processor.engine.stats['sent_search_count'] += 1
self.processor.search(search_query.query, params, result_container, time(), 5) self.processor.search(search_query.query, params, result_container, time(), 5)
return result_container return result_container

View File

@ -2,12 +2,12 @@
import threading import threading
from abc import abstractmethod, ABC from abc import abstractmethod, ABC
from time import time from timeit import default_timer
from searx import logger from searx import logger
from searx.engines import settings from searx.engines import settings
from searx.network import get_time_for_thread, get_network from searx.network import get_time_for_thread, get_network
from searx.metrology.error_recorder import record_exception, record_error from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
from searx.exceptions import SearxEngineAccessDeniedException from searx.exceptions import SearxEngineAccessDeniedException
@ -27,7 +27,7 @@ class SuspendedStatus:
@property @property
def is_suspended(self): def is_suspended(self):
return self.suspend_end_time >= time() return self.suspend_end_time >= default_timer()
def suspend(self, suspended_time, suspend_reason): def suspend(self, suspended_time, suspend_reason):
with self.lock: with self.lock:
@ -36,7 +36,7 @@ class SuspendedStatus:
if suspended_time is None: if suspended_time is None:
suspended_time = min(settings['search']['max_ban_time_on_fail'], suspended_time = min(settings['search']['max_ban_time_on_fail'],
self.continuous_errors * settings['search']['ban_time_on_fail']) self.continuous_errors * settings['search']['ban_time_on_fail'])
self.suspend_end_time = time() + suspended_time self.suspend_end_time = default_timer() + suspended_time
self.suspend_reason = suspend_reason self.suspend_reason = suspend_reason
logger.debug('Suspend engine for %i seconds', suspended_time) logger.debug('Suspend engine for %i seconds', suspended_time)
@ -55,7 +55,6 @@ class EngineProcessor(ABC):
def __init__(self, engine, engine_name): def __init__(self, engine, engine_name):
self.engine = engine self.engine = engine
self.engine_name = engine_name self.engine_name = engine_name
self.lock = threading.Lock()
key = get_network(self.engine_name) key = get_network(self.engine_name)
key = id(key) if key else self.engine_name key = id(key) if key else self.engine_name
self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus()) self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
@ -65,12 +64,11 @@ class EngineProcessor(ABC):
error_message = str(exception) if display_exception and exception else None error_message = str(exception) if display_exception and exception else None
result_container.add_unresponsive_engine(self.engine_name, reason, error_message) result_container.add_unresponsive_engine(self.engine_name, reason, error_message)
# metrics # metrics
with self.lock: counter_inc('engine', self.engine_name, 'search', 'count', 'error')
self.engine.stats['errors'] += 1
if exception: if exception:
record_exception(self.engine_name, exception) count_exception(self.engine_name, exception)
else: else:
record_error(self.engine_name, reason) count_error(self.engine_name, reason)
# suspend the engine ? # suspend the engine ?
if suspend: if suspend:
suspended_time = None suspended_time = None
@ -81,17 +79,14 @@ class EngineProcessor(ABC):
def _extend_container_basic(self, result_container, start_time, search_results): def _extend_container_basic(self, result_container, start_time, search_results):
# update result_container # update result_container
result_container.extend(self.engine_name, search_results) result_container.extend(self.engine_name, search_results)
engine_time = time() - start_time engine_time = default_timer() - start_time
page_load_time = get_time_for_thread() page_load_time = get_time_for_thread()
result_container.add_timing(self.engine_name, engine_time, page_load_time) result_container.add_timing(self.engine_name, engine_time, page_load_time)
# metrics # metrics
with self.lock: counter_inc('engine', self.engine_name, 'search', 'count', 'successful')
self.engine.stats['engine_time'] += engine_time histogram_observe(engine_time, 'engine', self.engine_name, 'time', 'total')
self.engine.stats['engine_time_count'] += 1 if page_load_time is not None:
# update stats with the total HTTP time histogram_observe(page_load_time, 'engine', self.engine_name, 'time', 'http')
if page_load_time is not None and 'page_load_time' in self.engine.stats:
self.engine.stats['page_load_time'] += page_load_time
self.engine.stats['page_load_count'] += 1
def extend_container(self, result_container, start_time, search_results): def extend_container(self, result_container, start_time, search_results):
if getattr(threading.current_thread(), '_timeout', False): if getattr(threading.current_thread(), '_timeout', False):

View File

@ -10,7 +10,7 @@ from searx import logger
from searx.utils import gen_useragent from searx.utils import gen_useragent
from searx.exceptions import (SearxEngineAccessDeniedException, SearxEngineCaptchaException, from searx.exceptions import (SearxEngineAccessDeniedException, SearxEngineCaptchaException,
SearxEngineTooManyRequestsException,) SearxEngineTooManyRequestsException,)
from searx.metrology.error_recorder import record_error from searx.metrics.error_recorder import count_error
from searx.search.processors.abstract import EngineProcessor from searx.search.processors.abstract import EngineProcessor
@ -90,9 +90,9 @@ class OnlineProcessor(EngineProcessor):
status_code = str(response.status_code or '') status_code = str(response.status_code or '')
reason = response.reason_phrase or '' reason = response.reason_phrase or ''
hostname = response.url.host hostname = response.url.host
record_error(self.engine_name, count_error(self.engine_name,
'{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
(status_code, reason, hostname)) (status_code, reason, hostname))
return response return response

View File

@ -51,7 +51,7 @@ from searx import logger
logger = logger.getChild('webapp') logger = logger.getChild('webapp')
from datetime import datetime, timedelta from datetime import datetime, timedelta
from time import time from timeit import default_timer
from html import escape from html import escape
from io import StringIO from io import StringIO
from urllib.parse import urlencode, urlparse from urllib.parse import urlencode, urlparse
@ -73,9 +73,7 @@ from flask.json import jsonify
from searx import brand, static_path from searx import brand, static_path
from searx import settings, searx_dir, searx_debug from searx import settings, searx_dir, searx_debug
from searx.exceptions import SearxParameterException from searx.exceptions import SearxParameterException
from searx.engines import ( from searx.engines import categories, engines, engine_shortcuts
categories, engines, engine_shortcuts, get_engines_stats
)
from searx.webutils import ( from searx.webutils import (
UnicodeWriter, highlight_content, get_resources_directory, UnicodeWriter, highlight_content, get_resources_directory,
get_static_files, get_result_templates, get_themes, get_static_files, get_result_templates, get_themes,
@ -95,7 +93,7 @@ from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
from searx.answerers import answerers from searx.answerers import answerers
from searx.network import stream as http_stream from searx.network import stream as http_stream
from searx.answerers import ask from searx.answerers import ask
from searx.metrology.error_recorder import errors_per_engines from searx.metrics import get_engines_stats, get_engine_errors, histogram
# serve pages with HTTP/1.1 # serve pages with HTTP/1.1
from werkzeug.serving import WSGIRequestHandler from werkzeug.serving import WSGIRequestHandler
@ -463,7 +461,7 @@ def _get_ordered_categories():
@app.before_request @app.before_request
def pre_request(): def pre_request():
request.start_time = time() request.start_time = default_timer()
request.timings = [] request.timings = []
request.errors = [] request.errors = []
@ -521,7 +519,7 @@ def add_default_headers(response):
@app.after_request @app.after_request
def post_request(response): def post_request(response):
total_time = time() - request.start_time total_time = default_timer() - request.start_time
timings_all = ['total;dur=' + str(round(total_time * 1000, 3))] timings_all = ['total;dur=' + str(round(total_time * 1000, 3))]
if len(request.timings) > 0: if len(request.timings) > 0:
timings = sorted(request.timings, key=lambda v: v['total']) timings = sorted(request.timings, key=lambda v: v['total'])
@ -852,29 +850,25 @@ def preferences():
allowed_plugins = request.preferences.plugins.get_enabled() allowed_plugins = request.preferences.plugins.get_enabled()
# stats for preferences page # stats for preferences page
stats = {} filtered_engines = dict(filter(lambda kv: (kv[0], request.preferences.validate_token(kv[1])), engines.items()))
engines_by_category = {} engines_by_category = {}
for c in categories: for c in categories:
engines_by_category[c] = [] engines_by_category[c] = [e for e in categories[c] if e.name in filtered_engines]
for e in categories[c]:
if not request.preferences.validate_token(e):
continue
stats[e.name] = {'time': None,
'warn_timeout': False,
'warn_time': False}
if e.timeout > settings['outgoing']['request_timeout']:
stats[e.name]['warn_timeout'] = True
stats[e.name]['supports_selected_language'] = _is_selected_language_supported(e, request.preferences)
engines_by_category[c].append(e)
# get first element [0], the engine time, # get first element [0], the engine time,
# and then the second element [1] : the time (the first one is the label) # and then the second element [1] : the time (the first one is the label)
for engine_stat in get_engines_stats(request.preferences)[0][1]: stats = {}
stats[engine_stat.get('name')]['time'] = round(engine_stat.get('avg'), 3) for _, e in filtered_engines.items():
if engine_stat.get('avg') > settings['outgoing']['request_timeout']: h = histogram('engine', e.name, 'time', 'total')
stats[engine_stat.get('name')]['warn_time'] = True median = round(h.percentage(50), 1) if h.count > 0 else None
stats[e.name] = {
'time': median if median else None,
'warn_time': median is not None and median > settings['outgoing']['request_timeout'],
'warn_timeout': e.timeout > settings['outgoing']['request_timeout'],
'supports_selected_language': _is_selected_language_supported(e, request.preferences)
}
# end of stats # end of stats
locked_preferences = list() locked_preferences = list()
@ -976,38 +970,23 @@ def image_proxy():
@app.route('/stats', methods=['GET']) @app.route('/stats', methods=['GET'])
def stats(): def stats():
"""Render engine statistics page.""" """Render engine statistics page."""
stats = get_engines_stats(request.preferences) filtered_engines = dict(filter(lambda kv: (kv[0], request.preferences.validate_token(kv[1])), engines.items()))
engine_stats = get_engines_stats(filtered_engines)
return render( return render(
'stats.html', 'stats.html',
stats=stats, stats=[(gettext('Engine time (sec)'), engine_stats['time_total']),
(gettext('Page loads (sec)'), engine_stats['time_http']),
(gettext('Number of results'), engine_stats['result_count']),
(gettext('Scores'), engine_stats['scores']),
(gettext('Scores per result'), engine_stats['scores_per_result']),
(gettext('Errors'), engine_stats['error_count'])]
) )
@app.route('/stats/errors', methods=['GET']) @app.route('/stats/errors', methods=['GET'])
def stats_errors(): def stats_errors():
result = {} filtered_engines = dict(filter(lambda kv: (kv[0], request.preferences.validate_token(kv[1])), engines.items()))
engine_names = list(errors_per_engines.keys()) result = get_engine_errors(filtered_engines)
engine_names.sort()
for engine_name in engine_names:
error_stats = errors_per_engines[engine_name]
sent_search_count = max(engines[engine_name].stats['sent_search_count'], 1)
sorted_context_count_list = sorted(error_stats.items(), key=lambda context_count: context_count[1])
r = []
percentage_sum = 0
for context, count in sorted_context_count_list:
percentage = round(20 * count / sent_search_count) * 5
percentage_sum += percentage
r.append({
'filename': context.filename,
'function': context.function,
'line_no': context.line_no,
'code': context.code,
'exception_classname': context.exception_classname,
'log_message': context.log_message,
'log_parameters': context.log_parameters,
'percentage': percentage,
})
result[engine_name] = sorted(r, reverse=True, key=lambda d: d['percentage'])
return jsonify(result) return jsonify(result)