From 0b45afd4d7f23cbbc33376b861103f54c5c9f856 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Thu, 25 Mar 2021 09:37:37 +0100 Subject: [PATCH] [fix] checker: various bug fixes * initialize engine_data (youtube engine) * don't crash if an engine don't set result['url'] --- searx/search/__init__.py | 2 -- searx/search/checker/impl.py | 6 ++++-- searx/search/models.py | 6 ++---- searx/search/processors/abstract.py | 1 + 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/searx/search/__init__.py b/searx/search/__init__.py index 27c6c3a88..f777e8595 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -111,8 +111,6 @@ class Search: if request_params is None: continue - request_params['engine_data'] = self.search_query.engine_data.get(engineref.name, {}) - with threading.RLock(): processor.engine.stats['sent_search_count'] += 1 diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index 25887b0f4..ad45440ea 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -174,7 +174,7 @@ class ResultContainerTests: @property def result_urls(self): results = self.result_container.get_ordered_results() - return [result['url'] for result in results] + return [result['url'] for result in results if 'url' in result] def _record_error(self, message: str, *args) -> None: sq = _search_query_to_dict(self.search_query) @@ -197,6 +197,8 @@ class ResultContainerTests: self._record_error('HTML in title', repr(result.get('title', ''))) if not _check_no_html(result.get('content', '')): self._record_error('HTML in content', repr(result.get('content', ''))) + if result.get('url') is None: + self._record_error('url is None') self._add_language(result.get('title', '')) self._add_language(result.get('content', '')) @@ -310,7 +312,7 @@ class CheckerTests: self.result_container_tests_list = result_container_tests_list def unique_results(self): - """Check the results of each ResultContain is unique""" + """Check the results of each ResultContainer is unique""" urls_list = [rct.result_urls for rct in self.result_container_tests_list] if len(urls_list[0]) > 0: # results on the first page diff --git a/searx/search/models.py b/searx/search/models.py index 21c0fe590..7233fac42 100644 --- a/searx/search/models.py +++ b/searx/search/models.py @@ -36,7 +36,7 @@ class SearchQuery: time_range: typing.Optional[str]=None, timeout_limit: typing.Optional[float]=None, external_bang: typing.Optional[str]=None, - engine_data: typing.Optional[dict]=None): + engine_data: typing.Optional[typing.Dict[str, str]]=None): self.query = query self.engineref_list = engineref_list self.lang = lang @@ -45,9 +45,7 @@ class SearchQuery: self.time_range = time_range self.timeout_limit = timeout_limit self.external_bang = external_bang - self.engine_data = engine_data - if engine_data is None: - self.engine_data = {} + self.engine_data = engine_data or {} @property def categories(self): diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index 3a853d49e..26dab069f 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -27,6 +27,7 @@ class EngineProcessor(ABC): params['pageno'] = search_query.pageno params['safesearch'] = search_query.safesearch params['time_range'] = search_query.time_range + params['engine_data'] = search_query.engine_data.get(self.engine_name, {}) if hasattr(self.engine, 'language') and self.engine.language: params['language'] = self.engine.language