From 99435381a84072b110c32004b2fb778af9b96f77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Sat, 1 Feb 2020 11:01:17 +0100 Subject: [PATCH] [enh] introduce private engines This PR adds a new setting to engines named `tokens`. It expects a list of tokens which lets searx validate if the request should be accepted or not. --- searx/__init__.py | 1 + searx/engines/__init__.py | 9 ++- searx/engines/dummy-offline.py | 12 ++++ searx/engines/genius.py | 1 + searx/preferences.py | 44 +++++++++++++ searx/query.py | 4 +- searx/search.py | 5 +- searx/templates/oscar/preferences.html | 6 ++ searx/webapp.py | 17 +++-- tests/unit/test_search.py | 90 ++++++++++++++++++++------ 10 files changed, 161 insertions(+), 28 deletions(-) create mode 100644 searx/engines/dummy-offline.py diff --git a/searx/__init__.py b/searx/__init__.py index d32fe0066..2f3ebfcfe 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -38,6 +38,7 @@ def check_settings_yml(file_name): else: return None + # find location of settings.yml if 'SEARX_SETTINGS_PATH' in environ: # if possible set path to settings using the diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 2393f52b6..9ccef8b54 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -54,7 +54,8 @@ engine_default_args = {'paging': False, 'suspend_end_time': 0, 'continuous_errors': 0, 'time_range_support': False, - 'offline': False} + 'offline': False, + 'tokens': []} def load_engine(engine_data): @@ -160,7 +161,7 @@ def to_percentage(stats, maxvalue): return stats -def get_engines_stats(): +def get_engines_stats(preferences): # TODO refactor pageloads = [] engine_times = [] @@ -171,8 +172,12 @@ def get_engines_stats(): max_pageload = max_engine_times = max_results = max_score = max_errors = max_score_per_result = 0 # noqa for engine in engines.values(): + if not preferences.validate_token(engine): + continue + if engine.stats['search_count'] == 0: continue + results_num = \ engine.stats['result_count'] / float(engine.stats['search_count']) diff --git a/searx/engines/dummy-offline.py b/searx/engines/dummy-offline.py new file mode 100644 index 000000000..13a9ecc01 --- /dev/null +++ b/searx/engines/dummy-offline.py @@ -0,0 +1,12 @@ +""" + Dummy Offline + + @results one result + @stable yes +""" + + +def search(query, request_params): + return [{ + 'result': 'this is what you get', + }] diff --git a/searx/engines/genius.py b/searx/engines/genius.py index b265e9d76..aa5afad9b 100644 --- a/searx/engines/genius.py +++ b/searx/engines/genius.py @@ -72,6 +72,7 @@ def parse_album(hit): result.update({'content': 'Released: {}'.format(year)}) return result + parse = {'lyric': parse_lyric, 'song': parse_lyric, 'artist': parse_artist, 'album': parse_album} diff --git a/searx/preferences.py b/searx/preferences.py index 30a4252b0..6befdd6e1 100644 --- a/searx/preferences.py +++ b/searx/preferences.py @@ -104,6 +104,31 @@ class MultipleChoiceSetting(EnumStringSetting): resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE) +class SetSetting(Setting): + def _post_init(self): + if not hasattr(self, 'values'): + self.values = set() + + def get_value(self): + return ','.join(self.values) + + def parse(self, data): + if data == '': + self.values = set() + return + + elements = data.split(',') + for element in elements: + self.values.add(element) + + def parse_form(self, data): + elements = data.split(',') + self.values = set(elements) + + def save(self, name, resp): + resp.set_cookie(name, ','.join(self.values), max_age=COOKIE_MAX_AGE) + + class SearchLanguageSetting(EnumStringSetting): """Available choices may change, so user's value may not be in choices anymore""" @@ -272,6 +297,7 @@ class Preferences(object): self.engines = EnginesSetting('engines', choices=engines) self.plugins = PluginsSetting('plugins', choices=plugins) + self.tokens = SetSetting('tokens') self.unknown_params = {} def get_as_url_params(self): @@ -288,6 +314,8 @@ class Preferences(object): settings_kv['disabled_plugins'] = ','.join(self.plugins.disabled) settings_kv['enabled_plugins'] = ','.join(self.plugins.enabled) + settings_kv['tokens'] = ','.join(self.tokens.values) + return urlsafe_b64encode(compress(urlencode(settings_kv).encode('utf-8'))).decode('utf-8') def parse_encoded_data(self, input_data): @@ -307,6 +335,8 @@ class Preferences(object): elif user_setting_name == 'disabled_plugins': self.plugins.parse_cookie((input_data.get('disabled_plugins', ''), input_data.get('enabled_plugins', ''))) + elif user_setting_name == 'tokens': + self.tokens.parse(user_setting) elif not any(user_setting_name.startswith(x) for x in [ 'enabled_', 'disabled_', @@ -328,6 +358,8 @@ class Preferences(object): enabled_categories.append(user_setting_name[len('category_'):]) elif user_setting_name.startswith('plugin_'): disabled_plugins.append(user_setting_name) + elif user_setting_name == 'tokens': + self.tokens.parse_form(user_setting) else: self.unknown_params[user_setting_name] = user_setting self.key_value_settings['categories'].parse_form(enabled_categories) @@ -346,6 +378,18 @@ class Preferences(object): user_setting.save(user_setting_name, resp) self.engines.save(resp) self.plugins.save(resp) + self.tokens.save('tokens', resp) for k, v in self.unknown_params.items(): resp.set_cookie(k, v, max_age=COOKIE_MAX_AGE) return resp + + def validate_token(self, engine): + valid = True + if hasattr(engine, 'tokens') and engine.tokens: + valid = False + for token in self.tokens.values: + if token in engine.tokens: + valid = True + break + + return valid diff --git a/searx/query.py b/searx/query.py index c4002bd31..79afa0245 100644 --- a/searx/query.py +++ b/searx/query.py @@ -177,7 +177,8 @@ class RawTextQuery(object): class SearchQuery(object): """container for all the search parameters (query, language, etc...)""" - def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range, timeout_limit=None): + def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range, + timeout_limit=None, preferences=None): self.query = query.encode('utf-8') self.engines = engines self.categories = categories @@ -186,6 +187,7 @@ class SearchQuery(object): self.pageno = pageno self.time_range = None if time_range in ('', 'None', None) else time_range self.timeout_limit = timeout_limit + self.preferences = preferences def __str__(self): return str(self.query) + ";" + str(self.engines) diff --git a/searx/search.py b/searx/search.py index 5c268cc5d..2dcc4c8f7 100644 --- a/searx/search.py +++ b/searx/search.py @@ -407,7 +407,7 @@ def get_search_query_from_webapp(preferences, form): return (SearchQuery(query, query_engines, query_categories, query_lang, query_safesearch, query_pageno, - query_time_range, query_timeout), + query_time_range, query_timeout, preferences), raw_text_query) @@ -459,6 +459,9 @@ class Search(object): engine = engines[selected_engine['name']] + if not search_query.preferences.validate_token(engine): + continue + # skip suspended engines if engine.suspend_end_time >= time(): logger.debug('Engine currently suspended: %s', selected_engine['name']) diff --git a/searx/templates/oscar/preferences.html b/searx/templates/oscar/preferences.html index 1a484dd4b..b03929df3 100644 --- a/searx/templates/oscar/preferences.html +++ b/searx/templates/oscar/preferences.html @@ -131,6 +131,12 @@ {% endfor %} {{ preferences_item_footer(info, label, rtl) }} + + {% set label = _('Engine tokens') %} + {% set info = _('Access tokens for private engines') %} + {{ preferences_item_header(info, label, rtl) }} + + {{ preferences_item_footer(info, label, rtl) }} diff --git a/searx/webapp.py b/searx/webapp.py index 5ed9f1277..fd34a9ef4 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -731,8 +731,13 @@ def preferences(): # stats for preferences page stats = {} + engines_by_category = {} for c in categories: + engines_by_category[c] = [] for e in categories[c]: + if not request.preferences.validate_token(e): + continue + stats[e.name] = {'time': None, 'warn_timeout': False, 'warn_time': False} @@ -740,9 +745,11 @@ def preferences(): stats[e.name]['warn_timeout'] = True stats[e.name]['supports_selected_language'] = _is_selected_language_supported(e, request.preferences) + engines_by_category[c].append(e) + # get first element [0], the engine time, # and then the second element [1] : the time (the first one is the label) - for engine_stat in get_engines_stats()[0][1]: + for engine_stat in get_engines_stats(request.preferences)[0][1]: stats[engine_stat.get('name')]['time'] = round(engine_stat.get('avg'), 3) if engine_stat.get('avg') > settings['outgoing']['request_timeout']: stats[engine_stat.get('name')]['warn_time'] = True @@ -752,7 +759,7 @@ def preferences(): locales=settings['locales'], current_locale=get_locale(), image_proxy=image_proxy, - engines_by_category=categories, + engines_by_category=engines_by_category, stats=stats, answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers], disabled_engines=disabled_engines, @@ -828,7 +835,7 @@ def image_proxy(): @app.route('/stats', methods=['GET']) def stats(): """Render engine statistics page.""" - stats = get_engines_stats() + stats = get_engines_stats(request.preferences) return render( 'stats.html', stats=stats, @@ -891,7 +898,7 @@ def clear_cookies(): @app.route('/config') def config(): return jsonify({'categories': list(categories.keys()), - 'engines': [{'name': engine_name, + 'engines': [{'name': name, 'categories': engine.categories, 'shortcut': engine.shortcut, 'enabled': not engine.disabled, @@ -904,7 +911,7 @@ def config(): 'safesearch': engine.safesearch, 'time_range_support': engine.time_range_support, 'timeout': engine.timeout} - for engine_name, engine in engines.items()], + for name, engine in engines.items() if request.preferences.validate_token(engine)], 'plugins': [{'name': plugin.name, 'enabled': plugin.default_on} for plugin in plugins], diff --git a/tests/unit/test_search.py b/tests/unit/test_search.py index a39786d1a..18c221954 100644 --- a/tests/unit/test_search.py +++ b/tests/unit/test_search.py @@ -1,60 +1,112 @@ # -*- coding: utf-8 -*- from searx.testing import SearxTestCase +from searx.preferences import Preferences +from searx.engines import engines -import searx.preferences import searx.search -import searx.engines + + +SAFESEARCH = 0 +PAGENO = 1 +PUBLIC_ENGINE_NAME = 'general dummy' +PRIVATE_ENGINE_NAME = 'general private offline' +TEST_ENGINES = [ + { + 'name': PUBLIC_ENGINE_NAME, + 'engine': 'dummy', + 'categories': 'general', + 'shortcut': 'gd', + 'timeout': 3.0, + 'tokens': [], + }, + { + 'name': PRIVATE_ENGINE_NAME, + 'engine': 'dummy-offline', + 'categories': 'general', + 'shortcut': 'do', + 'timeout': 3.0, + 'offline': True, + 'tokens': ['my-token'], + }, +] class SearchTestCase(SearxTestCase): @classmethod def setUpClass(cls): - searx.engines.initialize_engines([{ - 'name': 'general dummy', - 'engine': 'dummy', - 'categories': 'general', - 'shortcut': 'gd', - 'timeout': 3.0 - }]) + searx.engines.initialize_engines(TEST_ENGINES) def test_timeout_simple(self): searx.search.max_request_timeout = None - search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}], - ['general'], 'en-US', 0, 1, None, None) + search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}], + ['general'], 'en-US', SAFESEARCH, PAGENO, None, None, + preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) search.search() self.assertEquals(search.actual_timeout, 3.0) def test_timeout_query_above_default_nomax(self): searx.search.max_request_timeout = None - search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}], - ['general'], 'en-US', 0, 1, None, 5.0) + search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}], + ['general'], 'en-US', SAFESEARCH, PAGENO, None, 5.0, + preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) search.search() self.assertEquals(search.actual_timeout, 3.0) def test_timeout_query_below_default_nomax(self): searx.search.max_request_timeout = None - search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}], - ['general'], 'en-US', 0, 1, None, 1.0) + search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}], + ['general'], 'en-US', SAFESEARCH, PAGENO, None, 1.0, + preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) search.search() self.assertEquals(search.actual_timeout, 1.0) def test_timeout_query_below_max(self): searx.search.max_request_timeout = 10.0 - search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}], - ['general'], 'en-US', 0, 1, None, 5.0) + search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}], + ['general'], 'en-US', SAFESEARCH, PAGENO, None, 5.0, + preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) search.search() self.assertEquals(search.actual_timeout, 5.0) def test_timeout_query_above_max(self): searx.search.max_request_timeout = 10.0 - search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}], - ['general'], 'en-US', 0, 1, None, 15.0) + search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}], + ['general'], 'en-US', SAFESEARCH, PAGENO, None, 15.0, + preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) search.search() self.assertEquals(search.actual_timeout, 10.0) + + def test_query_private_engine_without_token(self): + search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PRIVATE_ENGINE_NAME}], + ['general'], 'en-US', SAFESEARCH, PAGENO, None, 2.0, + preferences=Preferences(['oscar'], ['general'], engines, [])) + search = searx.search.Search(search_query) + results = search.search() + self.assertEquals(results.results_length(), 0) + + def test_query_private_engine_with_incorrect_token(self): + preferences_with_tokens = Preferences(['oscar'], ['general'], engines, []) + preferences_with_tokens.parse_dict({'tokens': 'bad-token'}) + search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PRIVATE_ENGINE_NAME}], + ['general'], 'en-US', SAFESEARCH, PAGENO, None, 2.0, + preferences=preferences_with_tokens) + search = searx.search.Search(search_query) + results = search.search() + self.assertEquals(results.results_length(), 0) + + def test_query_private_engine_with_correct_token(self): + preferences_with_tokens = Preferences(['oscar'], ['general'], engines, []) + preferences_with_tokens.parse_dict({'tokens': 'my-token'}) + search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': PRIVATE_ENGINE_NAME}], + ['general'], 'en-US', SAFESEARCH, PAGENO, None, 2.0, + preferences=preferences_with_tokens) + search = searx.search.Search(search_query) + results = search.search() + self.assertEquals(results.results_length(), 1)