diff --git a/Makefile b/Makefile index 89e32ec90..334b3d35c 100644 --- a/Makefile +++ b/Makefile @@ -54,8 +54,8 @@ search.checker.%: install $(Q)./manage pyenv.cmd searx-checker -v "$(subst _, ,$(patsubst search.checker.%,%,$@))" PHONY += test ci.test test.shell -ci.test: test.yamllint test.pep8 test.pylint test.unit test.robot -test: test.yamllint test.pep8 test.pylint test.unit test.robot test.shell +ci.test: test.yamllint test.black test.pylint test.unit test.robot +test: test.yamllint test.black test.pylint test.unit test.robot test.shell test.shell: $(Q)shellcheck -x -s dash \ dockerfiles/docker-entrypoint.sh @@ -88,7 +88,8 @@ MANAGE += node.env node.clean MANAGE += py.build py.clean MANAGE += pyenv pyenv.install pyenv.uninstall MANAGE += pypi.upload pypi.upload.test -MANAGE += test.yamllint test.pylint test.pep8 test.unit test.coverage test.robot test.clean +MANAGE += format.python +MANAGE += test.yamllint test.pylint test.black test.unit test.coverage test.robot test.clean MANAGE += themes.all themes.oscar themes.simple themes.simple.test pygments.less MANAGE += static.build.commit static.build.drop static.build.restore MANAGE += nvm.install nvm.clean nvm.status nvm.nodejs diff --git a/manage b/manage index 8eb347f4f..bf202cb67 100755 --- a/manage +++ b/manage @@ -24,6 +24,8 @@ PY_SETUP_EXTRAS='[test]' GECKODRIVER_VERSION="v0.30.0" export NODE_MINIMUM_VERSION="16.13.0" # SPHINXOPTS= +BLACK_OPTIONS=("--target-version" "py37" "--line-length" "120" "--skip-string-normalization") +BLACK_TARGETS=("--exclude" "searx/static,searx/languages.py" "searx" "searxng_extra" "tests") pylint.FILES() { @@ -31,8 +33,7 @@ pylint.FILES() { # # # lint: pylint # - # These py files are linted by test.pylint(), all other files are linted by - # test.pep8() + # These py files are linted by test.pylint() grep -l -r --include \*.py '^#[[:blank:]]*lint:[[:blank:]]*pylint' searx searxng_extra tests } @@ -89,10 +90,12 @@ pyenv.: OK : test if virtualenv is OK pypi.upload: Upload python packages to PyPi (to test use pypi.upload.test) +format.: + python : format Python code source using black test.: yamllint : lint YAML files (YAMLLINT_FILES) pylint : lint PYLINT_FILES, searx/engines, searx & tests - pep8 : pycodestyle (pep8) for all files except PYLINT_FILES + black : check black code format unit : run unit tests coverage : run unit tests with coverage robot : run robot test @@ -617,6 +620,12 @@ pypi.upload.test() { pyenv.cmd twine upload -r testpypi "${PYDIST}"/* } +format.python() { + build_msg TEST "[format.python] black \$BLACK_TARGETS" + pyenv.cmd black "${BLACK_OPTIONS[@]}" "${BLACK_TARGETS[@]}" + dump_return $? +} + test.yamllint() { build_msg TEST "[yamllint] \$YAMLLINT_FILES" pyenv.cmd yamllint --format parsable "${YAMLLINT_FILES[@]}" @@ -646,15 +655,9 @@ test.pylint() { dump_return $? } -test.pep8() { - build_msg TEST 'pycodestyle (formerly pep8)' - local _exclude="" - printf -v _exclude '%s, ' "${PYLINT_FILES[@]}" - pyenv.cmd pycodestyle \ - --exclude="searx/static, searx/languages.py, $_exclude " \ - --max-line-length=120 \ - --ignore "E117,E252,E402,E722,E741,W503,W504,W605" \ - searx tests +test.black() { + build_msg TEST "[black] \$BLACK_TARGETS" + pyenv.cmd black --check --diff "${BLACK_OPTIONS[@]}" "${BLACK_TARGETS[@]}" dump_return $? } diff --git a/requirements-dev.txt b/requirements-dev.txt index c80afc460..0fef51f24 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,7 @@ mock==4.0.3 nose2[coverage_plugin]==0.10.0 cov-core==1.15.0 +black==21.12b0 pycodestyle==2.8.0 pylint==2.12.2 splinter==0.17.0 diff --git a/searx/__init__.py b/searx/__init__.py index b1626ae9f..d2d389ea9 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -29,6 +29,7 @@ if settings is not None: _unset = object() + def get_setting(name, default=_unset): """Returns the value to which ``name`` point. If there is no such name in the settings and the ``default`` is unset, a :py:obj:`KeyError` is raised. @@ -80,14 +81,9 @@ def logging_config_debug(): 'levelname': {'color': 8}, 'name': {'color': 8}, 'programname': {'color': 'cyan'}, - 'username': {'color': 'yellow'} + 'username': {'color': 'yellow'}, } - coloredlogs.install( - level=log_level, - level_styles=level_styles, - field_styles=field_styles, - fmt=LOG_FORMAT_DEBUG - ) + coloredlogs.install(level=log_level, level_styles=level_styles, field_styles=field_styles, fmt=LOG_FORMAT_DEBUG) else: logging.basicConfig(level=logging.getLevelName(log_level), format=LOG_FORMAT_DEBUG) diff --git a/searx/answerers/random/answerer.py b/searx/answerers/random/answerer.py index d5223e517..e6c383330 100644 --- a/searx/answerers/random/answerer.py +++ b/searx/answerers/random/answerer.py @@ -8,13 +8,12 @@ from flask_babel import gettext # specifies which search query keywords triggers this answerer keywords = ('random',) -random_int_max = 2**31 +random_int_max = 2 ** 31 random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase def random_characters(): - return [random.choice(random_string_letters) - for _ in range(random.randint(8, 32))] + return [random.choice(random_string_letters) for _ in range(random.randint(8, 32))] def random_string(): @@ -39,11 +38,13 @@ def random_uuid(): return str(uuid.uuid4()) -random_types = {'string': random_string, - 'int': random_int, - 'float': random_float, - 'sha256': random_sha256, - 'uuid': random_uuid} +random_types = { + 'string': random_string, + 'int': random_int, + 'float': random_float, + 'sha256': random_sha256, + 'uuid': random_uuid, +} # required answerer function @@ -62,6 +63,8 @@ def answer(query): # required answerer function # returns information about the answerer def self_info(): - return {'name': gettext('Random value generator'), - 'description': gettext('Generate different random values'), - 'examples': ['random {}'.format(x) for x in random_types]} + return { + 'name': gettext('Random value generator'), + 'description': gettext('Generate different random values'), + 'examples': ['random {}'.format(x) for x in random_types], + } diff --git a/searx/answerers/statistics/answerer.py b/searx/answerers/statistics/answerer.py index abd4be7f5..60f0d304f 100644 --- a/searx/answerers/statistics/answerer.py +++ b/searx/answerers/statistics/answerer.py @@ -4,11 +4,7 @@ from operator import mul from flask_babel import gettext -keywords = ('min', - 'max', - 'avg', - 'sum', - 'prod') +keywords = ('min', 'max', 'avg', 'sum', 'prod') # required answerer function @@ -47,6 +43,8 @@ def answer(query): # required answerer function # returns information about the answerer def self_info(): - return {'name': gettext('Statistics functions'), - 'description': gettext('Compute {functions} of the arguments').format(functions='/'.join(keywords)), - 'examples': ['avg 123 548 2.04 24.2']} + return { + 'name': gettext('Statistics functions'), + 'description': gettext('Compute {functions} of the arguments').format(functions='/'.join(keywords)), + 'examples': ['avg 123 548 2.04 24.2'], + } diff --git a/searx/autocomplete.py b/searx/autocomplete.py index a55377cd9..b8d272c32 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -120,14 +120,15 @@ def wikipedia(query, lang): return [] -backends = {'dbpedia': dbpedia, - 'duckduckgo': duckduckgo, - 'google': google, - 'startpage': startpage, - 'swisscows': swisscows, - 'qwant': qwant, - 'wikipedia': wikipedia - } +backends = { + 'dbpedia': dbpedia, + 'duckduckgo': duckduckgo, + 'google': google, + 'startpage': startpage, + 'swisscows': swisscows, + 'qwant': qwant, + 'wikipedia': wikipedia, +} def search_autocomplete(backend_name, query, lang): diff --git a/searx/data/__init__.py b/searx/data/__init__.py index 5937ea557..87bfb5477 100644 --- a/searx/data/__init__.py +++ b/searx/data/__init__.py @@ -23,10 +23,12 @@ from pathlib import Path data_dir = Path(__file__).parent + def _load(filename): with open(data_dir / filename, encoding='utf-8') as f: return json.load(f) + def ahmia_blacklist_loader(): """Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion names. The MD5 values are fetched by:: @@ -39,6 +41,7 @@ def ahmia_blacklist_loader(): with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as f: return f.read().split() + ENGINES_LANGUAGES = _load('engines_languages.json') CURRENCIES = _load('currencies.json') USER_AGENTS = _load('useragents.json') diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py index e6a243596..730a4c445 100644 --- a/searx/engines/1337x.py +++ b/searx/engines/1337x.py @@ -43,11 +43,15 @@ def response(resp): filesize, filesize_multiplier = filesize_info.split() filesize = get_torrent_size(filesize, filesize_multiplier) - results.append({'url': href, - 'title': title, - 'seed': seed, - 'leech': leech, - 'filesize': filesize, - 'template': 'torrent.html'}) + results.append( + { + 'url': href, + 'title': title, + 'seed': seed, + 'leech': leech, + 'filesize': filesize, + 'template': 'torrent.html', + } + ) return results diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 44ea9a4bd..fa9749e9d 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -57,6 +57,7 @@ engine_shortcuts = {} """ + def load_engine(engine_data): """Load engine from ``engine_data``. @@ -166,20 +167,19 @@ def set_language_attributes(engine): # settings.yml if engine.language not in engine.supported_languages: raise ValueError( - "settings.yml - engine: '%s' / language: '%s' not supported" % ( - engine.name, engine.language )) + "settings.yml - engine: '%s' / language: '%s' not supported" % (engine.name, engine.language) + ) if isinstance(engine.supported_languages, dict): - engine.supported_languages = { - engine.language : engine.supported_languages[engine.language] - } + engine.supported_languages = {engine.language: engine.supported_languages[engine.language]} else: engine.supported_languages = [engine.language] # find custom aliases for non standard language codes for engine_lang in engine.supported_languages: iso_lang = match_language(engine_lang, BABEL_LANGS, fallback=None) - if (iso_lang + if ( + iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and iso_lang not in engine.supported_languages @@ -197,14 +197,12 @@ def set_language_attributes(engine): } engine.fetch_supported_languages = ( # pylint: disable=protected-access - lambda: engine._fetch_supported_languages( - get(engine.supported_languages_url, headers=headers)) + lambda: engine._fetch_supported_languages(get(engine.supported_languages_url, headers=headers)) ) def update_attributes_for_tor(engine): - if (settings['outgoing'].get('using_tor_proxy') - and hasattr(engine, 'onion_url') ): + if settings['outgoing'].get('using_tor_proxy') and hasattr(engine, 'onion_url'): engine.search_url = engine.onion_url + getattr(engine, 'search_path', '') engine.timeout += settings['outgoing'].get('extra_proxy_timeout', 0) @@ -217,9 +215,7 @@ def is_missing_required_attributes(engine): missing = False for engine_attr in dir(engine): if not engine_attr.startswith('_') and getattr(engine, engine_attr) is None: - logger.error( - 'Missing engine config attribute: "{0}.{1}"' - .format(engine.name, engine_attr)) + logger.error('Missing engine config attribute: "{0}.{1}"'.format(engine.name, engine_attr)) missing = True return missing @@ -230,8 +226,7 @@ def is_engine_active(engine): return False # exclude onion engines if not using tor - if ('onions' in engine.categories - and not settings['outgoing'].get('using_tor_proxy') ): + if 'onions' in engine.categories and not settings['outgoing'].get('using_tor_proxy'): return False return True @@ -253,8 +248,7 @@ def register_engine(engine): def load_engines(engine_list): - """usage: ``engine_list = settings['engines']`` - """ + """usage: ``engine_list = settings['engines']``""" engines.clear() engine_shortcuts.clear() categories.clear() diff --git a/searx/engines/ahmia.py b/searx/engines/ahmia.py index b9a0086bd..33e0cc393 100644 --- a/searx/engines/ahmia.py +++ b/searx/engines/ahmia.py @@ -25,9 +25,7 @@ page_size = 10 # search url search_url = 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion/search/?{query}' time_range_support = True -time_range_dict = {'day': 1, - 'week': 7, - 'month': 30} +time_range_dict = {'day': 1, 'week': 7, 'month': 30} # xpaths results_xpath = '//li[@class="result"]' @@ -54,7 +52,7 @@ def response(resp): # trim results so there's not way too many at once first_result_index = page_size * (resp.search_params.get('pageno', 1) - 1) all_results = eval_xpath_list(dom, results_xpath) - trimmed_results = all_results[first_result_index:first_result_index + page_size] + trimmed_results = all_results[first_result_index : first_result_index + page_size] # get results for result in trimmed_results: @@ -65,10 +63,7 @@ def response(resp): title = extract_text(eval_xpath(result, title_xpath)) content = extract_text(eval_xpath(result, content_xpath)) - results.append({'url': cleaned_url, - 'title': title, - 'content': content, - 'is_onion': True}) + results.append({'url': cleaned_url, 'title': title, 'content': content, 'is_onion': True}) # get spelling corrections for correction in eval_xpath_list(dom, correction_xpath): diff --git a/searx/engines/apkmirror.py b/searx/engines/apkmirror.py index 746a8cd9c..da84bc79e 100644 --- a/searx/engines/apkmirror.py +++ b/searx/engines/apkmirror.py @@ -35,8 +35,8 @@ search_url = base_url + '/?post_type=app_release&searchtype=apk&page={pageno}&{q def request(query, params): params['url'] = search_url.format( - pageno = params['pageno'], - query = urlencode({'s': query}), + pageno=params['pageno'], + query=urlencode({'s': query}), ) logger.debug("query_url --> %s", params['url']) return params @@ -55,11 +55,7 @@ def response(resp): url = base_url + link.attrib.get('href') + '#downloads' title = extract_text(link) img_src = base_url + eval_xpath_getindex(result, './/img/@src', 0) - res = { - 'url': url, - 'title': title, - 'img_src': img_src - } + res = {'url': url, 'title': title, 'img_src': img_src} results.append(res) diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index aeac145d1..1cfb3983f 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -39,6 +39,7 @@ def locale_to_lang_code(locale): # wikis for some languages were moved off from the main site, we need to make # requests to correct URLs to be able to get results in those languages lang_urls = { + # fmt: off 'all': { 'base': 'https://wiki.archlinux.org', 'search': '/index.php?title=Special:Search&offset={offset}&{query}' @@ -63,6 +64,7 @@ lang_urls = { 'base': 'http://archtr.org/wiki', 'search': '/index.php?title=Özel:Ara&offset={offset}&{query}' } + # fmt: on } @@ -95,7 +97,7 @@ main_langs = { 'sl': 'Slovenský', 'th': 'ไทย', 'uk': 'Українська', - 'zh': '简体中文' + 'zh': '简体中文', } supported_languages = dict(lang_urls, **main_langs) @@ -139,7 +141,6 @@ def response(resp): href = urljoin(base_url, link.attrib.get('href')) title = extract_text(link) - results.append({'url': href, - 'title': title}) + results.append({'url': href, 'title': title}) return results diff --git a/searx/engines/artic.py b/searx/engines/artic.py index 104ab8839..c0ae0a5e7 100644 --- a/searx/engines/artic.py +++ b/searx/engines/artic.py @@ -27,19 +27,23 @@ nb_per_page = 20 search_api = 'https://api.artic.edu/api/v1/artworks/search?' image_api = 'https://www.artic.edu/iiif/2/' + def request(query, params): - args = urlencode({ - 'q' : query, - 'page' : params['pageno'], - 'fields' : 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles', - 'limit' : nb_per_page, - }) + args = urlencode( + { + 'q': query, + 'page': params['pageno'], + 'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles', + 'limit': nb_per_page, + } + ) params['url'] = search_api + args logger.debug("query_url --> %s", params['url']) return params + def response(resp): results = [] @@ -50,14 +54,16 @@ def response(resp): if not result['image_id']: continue - results.append({ - 'url': 'https://artic.edu/artworks/%(id)s' % result, - 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, - 'content': result['medium_display'], - 'author': ', '.join(result['artist_titles']), - 'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result, - 'img_format': result['dimensions'], - 'template': 'images.html' - }) + results.append( + { + 'url': 'https://artic.edu/artworks/%(id)s' % result, + 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, + 'content': result['medium_display'], + 'author': ', '.join(result['artist_titles']), + 'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result, + 'img_format': result['dimensions'], + 'template': 'images.html', + } + ) return results diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py index 09ea07ea5..a1a58172d 100644 --- a/searx/engines/arxiv.py +++ b/searx/engines/arxiv.py @@ -20,8 +20,9 @@ about = { categories = ['science'] paging = True -base_url = 'https://export.arxiv.org/api/query?search_query=all:'\ - + '{query}&start={offset}&max_results={number_of_results}' +base_url = ( + 'https://export.arxiv.org/api/query?search_query=all:' + '{query}&start={offset}&max_results={number_of_results}' +) # engine dependent config number_of_results = 10 @@ -31,9 +32,7 @@ def request(query, params): # basic search offset = (params['pageno'] - 1) * number_of_results - string_args = dict(query=query, - offset=offset, - number_of_results=number_of_results) + string_args = dict(query=query, offset=offset, number_of_results=number_of_results) params['url'] = base_url.format(**string_args) @@ -65,10 +64,7 @@ def response(resp): publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ') - res_dict = {'url': url, - 'title': title, - 'publishedDate': publishedDate, - 'content': content} + res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content} results.append(res_dict) diff --git a/searx/engines/bandcamp.py b/searx/engines/bandcamp.py index 62745243f..ba951a393 100644 --- a/searx/engines/bandcamp.py +++ b/searx/engines/bandcamp.py @@ -44,9 +44,7 @@ def request(query, params): pageno : 1 # number of the requested page ''' - search_path = search_string.format( - query=urlencode({'q': query}), - page=params['pageno']) + search_path = search_string.format(query=urlencode({'q': query}), page=params['pageno']) params['url'] = base_url + search_path diff --git a/searx/engines/base.py b/searx/engines/base.py index 463274681..5a2d66619 100755 --- a/searx/engines/base.py +++ b/searx/engines/base.py @@ -21,8 +21,10 @@ about = { categories = ['science'] -base_url = 'https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi'\ - + '?func=PerformSearch&{query}&boost=oa&hits={hits}&offset={offset}' +base_url = ( + 'https://api.base-search.net/cgi-bin/BaseHttpSearchInterface.fcgi' + + '?func=PerformSearch&{query}&boost=oa&hits={hits}&offset={offset}' +) # engine dependent config paging = True @@ -47,7 +49,7 @@ shorcut_dict = { 'source:': 'dcsource:', 'subject:': 'dcsubject:', 'title:': 'dctitle:', - 'type:': 'dcdctype:' + 'type:': 'dcdctype:', } @@ -59,9 +61,7 @@ def request(query, params): # basic search offset = (params['pageno'] - 1) * number_of_results - string_args = dict(query=urlencode({'query': query}), - offset=offset, - hits=number_of_results) + string_args = dict(query=urlencode({'query': query}), offset=offset, hits=number_of_results) params['url'] = base_url.format(**string_args) @@ -93,7 +93,7 @@ def response(resp): if len(item.text) > 300: content += "..." -# dates returned by the BASE API are not several formats + # dates returned by the BASE API are not several formats publishedDate = None for date_format in ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%d', '%Y-%m', '%Y']: try: @@ -103,14 +103,9 @@ def response(resp): pass if publishedDate is not None: - res_dict = {'url': url, - 'title': title, - 'publishedDate': publishedDate, - 'content': content} + res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content} else: - res_dict = {'url': url, - 'title': title, - 'content': content} + res_dict = {'url': url, 'title': title, 'content': content} results.append(res_dict) diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 3917e54c1..59fc22be4 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -36,9 +36,11 @@ inital_query = 'search?{query}&search=&form=QBLH' # following queries: https://www.bing.com/search?q=foo&search=&first=11&FORM=PERE page_query = 'search?{query}&search=&first={offset}&FORM=PERE' + def _get_offset_from_pageno(pageno): return (pageno - 1) * 10 + 1 + def request(query, params): offset = _get_offset_from_pageno(params.get('pageno', 1)) @@ -53,30 +55,23 @@ def request(query, params): if params['language'] == 'all': lang = 'EN' else: - lang = match_language( - params['language'], supported_languages, language_aliases - ) + lang = match_language(params['language'], supported_languages, language_aliases) - query = 'language:{} {}'.format( - lang.split('-')[0].upper(), query - ) + query = 'language:{} {}'.format(lang.split('-')[0].upper(), query) - search_path = search_string.format( - query = urlencode({'q': query}), - offset = offset) + search_path = search_string.format(query=urlencode({'q': query}), offset=offset) if offset > 1: - referer = base_url + inital_query.format(query = urlencode({'q': query})) + referer = base_url + inital_query.format(query=urlencode({'q': query})) params['headers']['Referer'] = referer - logger.debug("headers.Referer --> %s", referer ) + logger.debug("headers.Referer --> %s", referer) params['url'] = base_url + search_path params['headers']['Accept-Language'] = "en-US,en;q=0.5" - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' return params + def response(resp): results = [] @@ -87,7 +82,7 @@ def response(resp): for result in eval_xpath(dom, '//div[@class="sa_cc"]'): # IMO //div[@class="sa_cc"] does no longer match - logger.debug('found //div[@class="sa_cc"] --> %s', result) + logger.debug('found //div[@class="sa_cc"] --> %s', result) link = eval_xpath(result, './/h3/a')[0] url = link.attrib.get('href') @@ -95,11 +90,7 @@ def response(resp): content = extract_text(eval_xpath(result, './/p')) # append result - results.append({ - 'url': url, - 'title': title, - 'content': content - }) + results.append({'url': url, 'title': title, 'content': content}) # parse results again if nothing is found yet for result in eval_xpath(dom, '//li[@class="b_algo"]'): @@ -110,18 +101,14 @@ def response(resp): content = extract_text(eval_xpath(result, './/p')) # append result - results.append({ - 'url': url, - 'title': title, - 'content': content - }) + results.append({'url': url, 'title': title, 'content': content}) try: result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]//text()')) if "-" in result_len_container: # Remove the part "from-to" for paginated request ... - result_len_container = result_len_container[result_len_container.find("-") * 2 + 2:] + result_len_container = result_len_container[result_len_container.find("-") * 2 + 2 :] result_len_container = re.sub('[^0-9]', '', result_len_container) diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 4bee9bc7d..73b61b896 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -6,10 +6,13 @@ from urllib.parse import urlencode from lxml import html from json import loads -from searx.utils import match_language +from searx.utils import match_language from searx.engines.bing import language_aliases -from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +from searx.engines.bing import ( # pylint: disable=unused-import + _fetch_supported_languages, + supported_languages_url, +) # about about = { @@ -31,39 +34,33 @@ number_of_results = 28 # search-url base_url = 'https://www.bing.com/' -search_string = 'images/search'\ - '?{query}'\ - '&count={count}'\ - '&first={first}'\ +search_string = ( + # fmt: off + 'images/search' + '?{query}' + '&count={count}' + '&first={first}' '&tsc=ImageHoverTitle' + # fmt: on +) time_range_string = '&qft=+filterui:age-lt{interval}' -time_range_dict = {'day': '1440', - 'week': '10080', - 'month': '43200', - 'year': '525600'} +time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'} # safesearch definitions -safesearch_types = {2: 'STRICT', - 1: 'DEMOTE', - 0: 'OFF'} +safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'} # do search-request def request(query, params): offset = ((params['pageno'] - 1) * number_of_results) + 1 - search_path = search_string.format( - query=urlencode({'q': query}), - count=number_of_results, - first=offset) + search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset) language = match_language(params['language'], supported_languages, language_aliases).lower() - params['cookies']['SRCHHPGUSR'] = \ - 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') + params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') - params['cookies']['_EDGE_S'] = 'mkt=' + language +\ - '&ui=' + language + '&F=1' + params['cookies']['_EDGE_S'] = 'mkt=' + language + '&ui=' + language + '&F=1' params['url'] = base_url + search_path if params['time_range'] in time_range_dict: @@ -92,14 +89,18 @@ def response(resp): # strip 'Unicode private use area' highlighting, they render to Tux # the Linux penguin and a standing diamond on my machine... title = m.get('t', '').replace('\ue000', '').replace('\ue001', '') - results.append({'template': 'images.html', - 'url': m['purl'], - 'thumbnail_src': m['turl'], - 'img_src': m['murl'], - 'content': '', - 'title': title, - 'source': source, - 'img_format': img_format}) + results.append( + { + 'template': 'images.html', + 'url': m['purl'], + 'thumbnail_src': m['turl'], + 'img_src': m['murl'], + 'content': '', + 'title': title, + 'source': source, + 'img_format': img_format, + } + ) except: continue diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index c2515385c..22856541b 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -13,10 +13,7 @@ from datetime import datetime from dateutil import parser from lxml import etree from lxml.etree import XPath -from searx.utils import ( - match_language, - eval_xpath_getindex -) +from searx.utils import match_language, eval_xpath_getindex from searx.engines.bing import ( # pylint: disable=unused-import language_aliases, _fetch_supported_languages, @@ -42,11 +39,8 @@ time_range_support = True base_url = 'https://www.bing.com/' search_string = 'news/search?{query}&first={offset}&format=RSS' search_string_with_time = 'news/search?{query}&first={offset}&qft=interval%3d"{interval}"&format=RSS' -time_range_dict = { - 'day': '7', - 'week': '8', - 'month': '9' -} +time_range_dict = {'day': '7', 'week': '8', 'month': '9'} + def url_cleanup(url_string): """remove click""" @@ -57,6 +51,7 @@ def url_cleanup(url_string): url_string = query.get('url', None) return url_string + def image_url_cleanup(url_string): """replace the http://*bing.com/th?id=... by https://www.bing.com/th?id=...""" @@ -66,27 +61,33 @@ def image_url_cleanup(url_string): url_string = "https://www.bing.com/th?id=" + quote(query.get('id')) return url_string + def _get_url(query, language, offset, time_range): if time_range in time_range_dict: search_path = search_string_with_time.format( + # fmt: off query = urlencode({ 'q': query, 'setmkt': language }), offset = offset, interval = time_range_dict[time_range] + # fmt: on ) else: # e.g. setmkt=de-de&setlang=de search_path = search_string.format( + # fmt: off query = urlencode({ 'q': query, 'setmkt': language }), offset = offset + # fmt: on ) return base_url + search_path + def request(query, params): if params['time_range'] and params['time_range'] not in time_range_dict: @@ -101,6 +102,7 @@ def request(query, params): return params + def response(resp): results = [] @@ -123,26 +125,16 @@ def response(resp): publishedDate = datetime.now() # thumbnail - thumbnail = eval_xpath_getindex( - item, XPath('./News:Image/text()', namespaces=namespaces), 0, default=None) + thumbnail = eval_xpath_getindex(item, XPath('./News:Image/text()', namespaces=namespaces), 0, default=None) if thumbnail is not None: thumbnail = image_url_cleanup(thumbnail) # append result if thumbnail is not None: - results.append({ - 'url': url, - 'title': title, - 'publishedDate': publishedDate, - 'content': content, - 'img_src': thumbnail - }) + results.append( + {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content, 'img_src': thumbnail} + ) else: - results.append({ - 'url': url, - 'title': title, - 'publishedDate': publishedDate, - 'content': content - }) + results.append({'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}) return results diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index 2e1f13de2..7f8820546 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -6,12 +6,15 @@ from json import loads from lxml import html from urllib.parse import urlencode + from searx.utils import match_language - from searx.engines.bing import language_aliases -from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import -# about +from searx.engines.bing import ( # pylint: disable=unused-import + _fetch_supported_languages, + supported_languages_url, +) + about = { "website": 'https://www.bing.com/videos', "wikidata_id": 'Q4914152', @@ -28,36 +31,31 @@ time_range_support = True number_of_results = 28 base_url = 'https://www.bing.com/' -search_string = 'videos/search'\ - '?{query}'\ - '&count={count}'\ - '&first={first}'\ - '&scope=video'\ +search_string = ( + # fmt: off + 'videos/search' + '?{query}' + '&count={count}' + '&first={first}' + '&scope=video' '&FORM=QBLH' + # fmt: on +) time_range_string = '&qft=+filterui:videoage-lt{interval}' -time_range_dict = {'day': '1440', - 'week': '10080', - 'month': '43200', - 'year': '525600'} +time_range_dict = {'day': '1440', 'week': '10080', 'month': '43200', 'year': '525600'} # safesearch definitions -safesearch_types = {2: 'STRICT', - 1: 'DEMOTE', - 0: 'OFF'} +safesearch_types = {2: 'STRICT', 1: 'DEMOTE', 0: 'OFF'} # do search-request def request(query, params): offset = ((params['pageno'] - 1) * number_of_results) + 1 - search_path = search_string.format( - query=urlencode({'q': query}), - count=number_of_results, - first=offset) + search_path = search_string.format(query=urlencode({'q': query}), count=number_of_results, first=offset) # safesearch cookie - params['cookies']['SRCHHPGUSR'] = \ - 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') + params['cookies']['SRCHHPGUSR'] = 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') # language cookie language = match_language(params['language'], supported_languages, language_aliases).lower() @@ -89,11 +87,15 @@ def response(resp): info = ' - '.join(result.xpath('.//div[@class="mc_vtvc_meta_block"]//span/text()')).strip() content = '{0} - {1}'.format(metadata['du'], info) thumbnail = '{0}th?id={1}'.format(base_url, metadata['thid']) - results.append({'url': metadata['murl'], - 'thumbnail': thumbnail, - 'title': metadata.get('vt', ''), - 'content': content, - 'template': 'videos.html'}) + results.append( + { + 'url': metadata['murl'], + 'thumbnail': thumbnail, + 'title': metadata.get('vt', ''), + 'content': content, + 'template': 'videos.html', + } + ) except: continue diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index cda9e9355..c5dd92105 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -11,10 +11,7 @@ from searx.utils import extract_text, get_torrent_size about = { "website": 'https://btdig.com', "wikidata_id": 'Q4836698', - "official_api_documentation": { - 'url': 'https://btdig.com/contacts', - 'comment': 'on demand' - }, + "official_api_documentation": {'url': 'https://btdig.com/contacts', 'comment': 'on demand'}, "use_official_api": False, "require_api_key": False, "results": 'HTML', @@ -31,8 +28,7 @@ search_url = url + '/search?q={search_term}&p={pageno}' # do search-request def request(query, params): - params['url'] = search_url.format(search_term=quote(query), - pageno=params['pageno'] - 1) + params['url'] = search_url.format(search_term=quote(query), pageno=params['pageno'] - 1) return params @@ -77,13 +73,17 @@ def response(resp): magnetlink = result.xpath('.//div[@class="torrent_magnet"]//a')[0].attrib['href'] # append result - results.append({'url': href, - 'title': title, - 'content': content, - 'filesize': filesize, - 'files': files, - 'magnetlink': magnetlink, - 'template': 'torrent.html'}) + results.append( + { + 'url': href, + 'title': title, + 'content': content, + 'filesize': filesize, + 'files': files, + 'magnetlink': magnetlink, + 'template': 'torrent.html', + } + ) # return results sorted by seeder return results diff --git a/searx/engines/ccengine.py b/searx/engines/ccengine.py index 6f3a5adb7..93ac30c86 100644 --- a/searx/engines/ccengine.py +++ b/searx/engines/ccengine.py @@ -29,10 +29,7 @@ search_string = '&page={page}&page_size={nb_per_page}&format=json&{query}' def request(query, params): - search_path = search_string.format( - query=urlencode({'q': query}), - nb_per_page=nb_per_page, - page=params['pageno']) + search_path = search_string.format(query=urlencode({'q': query}), nb_per_page=nb_per_page, page=params['pageno']) params['url'] = base_url + search_path @@ -45,9 +42,13 @@ def response(resp): json_data = loads(resp.text) for result in json_data['results']: - results.append({'url': result['foreign_landing_url'], - 'title': result['title'], - 'img_src': result['url'], - 'template': 'images.html'}) + results.append( + { + 'url': result['foreign_landing_url'], + 'title': result['title'], + 'img_src': result['url'], + 'template': 'images.html', + } + ) return results diff --git a/searx/engines/command.py b/searx/engines/command.py index aca379c67..abd29e2a5 100644 --- a/searx/engines/command.py +++ b/searx/engines/command.py @@ -138,7 +138,7 @@ def __check_query_params(params): def check_parsing_options(engine_settings): - """ Checks if delimiter based parsing or regex parsing is configured correctly """ + """Checks if delimiter based parsing or regex parsing is configured correctly""" if 'delimiter' not in engine_settings and 'parse_regex' not in engine_settings: raise ValueError('failed to init settings for parsing lines: missing delimiter or parse_regex') @@ -151,7 +151,7 @@ def check_parsing_options(engine_settings): def __parse_single_result(raw_result): - """ Parses command line output based on configuration """ + """Parses command line output based on configuration""" result = {} @@ -167,6 +167,6 @@ def __parse_single_result(raw_result): found = regex.search(raw_result) if not found: return {} - result[result_key] = raw_result[found.start():found.end()] + result[result_key] = raw_result[found.start() : found.end()] return result diff --git a/searx/engines/core.py b/searx/engines/core.py index e83c8bbe9..1fcb68f1f 100644 --- a/searx/engines/core.py +++ b/searx/engines/core.py @@ -28,22 +28,24 @@ api_key = 'unset' base_url = 'https://core.ac.uk:443/api-v2/search/' search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}' + def request(query, params): if api_key == 'unset': raise SearxEngineAPIException('missing CORE API key') search_path = search_string.format( - query = urlencode({'q': query}), - nb_per_page = nb_per_page, - page = params['pageno'], - apikey = api_key, + query=urlencode({'q': query}), + nb_per_page=nb_per_page, + page=params['pageno'], + apikey=api_key, ) params['url'] = base_url + search_path logger.debug("query_url --> %s", params['url']) return params + def response(resp): results = [] json_data = loads(resp.text) @@ -52,7 +54,7 @@ def response(resp): source = result['_source'] time = source['publishedDate'] or source['depositedDate'] - if time : + if time: date = datetime.fromtimestamp(time / 1000) else: date = None @@ -66,12 +68,14 @@ def response(resp): metadata.append(source['doi']) metadata = ' / '.join(metadata) - results.append({ - 'url': source['urls'][0].replace('http://', 'https://', 1), - 'title': source['title'], - 'content': source['description'], - 'publishedDate': date, - 'metadata' : metadata, - }) + results.append( + { + 'url': source['urls'][0].replace('http://', 'https://', 1), + 'title': source['title'], + 'content': source['description'], + 'publishedDate': date, + 'metadata': metadata, + } + ) return results diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index d4c3b5f81..969688126 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -30,7 +30,7 @@ def request(query, params): def response(resp): """remove first and last lines to get only json""" - json_resp = resp.text[resp.text.find('\n') + 1:resp.text.rfind('\n') - 2] + json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2] results = [] try: conversion_rate = float(json.loads(json_resp)['conversion']['converted-amount']) @@ -47,7 +47,8 @@ def response(resp): ) url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'.format( - resp.search_params['from'].upper(), resp.search_params['to']) + resp.search_params['from'].upper(), resp.search_params['to'] + ) results.append({'answer': answer, 'url': url}) diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 92d368c11..5607691a4 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -25,8 +25,10 @@ paging = True # search-url # see http://www.dailymotion.com/doc/api/obj-video.html search_url = 'https://api.dailymotion.com/videos?fields=created_time,title,description,duration,url,thumbnail_360_url,id&sort=relevance&limit=5&page={pageno}&{query}' # noqa -embedded_url = '' +embedded_url = ( + '' +) supported_languages_url = 'https://api.dailymotion.com/languages' @@ -39,8 +41,8 @@ def request(query, params): locale = match_language(params['language'], supported_languages) params['url'] = search_url.format( - query=urlencode({'search': query, 'localization': locale}), - pageno=params['pageno']) + query=urlencode({'search': query, 'localization': locale}), pageno=params['pageno'] + ) return params @@ -67,13 +69,17 @@ def response(resp): # http to https thumbnail = thumbnail.replace("http://", "https://") - results.append({'template': 'videos.html', - 'url': url, - 'title': title, - 'content': content, - 'publishedDate': publishedDate, - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'template': 'videos.html', + 'url': url, + 'title': title, + 'content': content, + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) # return results return results diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index 946bd3ebe..220ac599d 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -24,9 +24,11 @@ paging = True url = 'https://api.deezer.com/' search_url = url + 'search?{query}&index={offset}' -embedded_url = '' +embedded_url = ( + '' +) # do search-request @@ -53,18 +55,12 @@ def response(resp): if url.startswith('http://'): url = 'https' + url[4:] - content = '{} - {} - {}'.format( - result['artist']['name'], - result['album']['title'], - result['title']) + content = '{} - {} - {}'.format(result['artist']['name'], result['album']['title'], result['title']) embedded = embedded_url.format(audioid=result['id']) # append result - results.append({'url': url, - 'title': title, - 'embedded': embedded, - 'content': content}) + results.append({'url': url, 'title': title, 'embedded': embedded, 'content': content}) # return results return results diff --git a/searx/engines/demo_offline.py b/searx/engines/demo_offline.py index a4a632180..aeb74f443 100644 --- a/searx/engines/demo_offline.py +++ b/searx/engines/demo_offline.py @@ -31,6 +31,7 @@ about = { # if there is a need for globals, use a leading underline _my_offline_engine = None + def init(engine_settings=None): """Initialization of the (offline) engine. The origin of this demo engine is a simple json string which is loaded in this example while the engine is @@ -44,11 +45,10 @@ def init(engine_settings=None): ', {"value":"first item"}' ', {"value":"second item"}' ', {"value":"third item"}' - ']' - - % engine_settings.get('name') + ']' % engine_settings.get('name') ) + def search(query, request_params): """Query (offline) engine and return results. Assemble the list of results from your local engine. In this demo engine we ignore the 'query' term, usual @@ -62,11 +62,11 @@ def search(query, request_params): for row in result_list: entry = { - 'query' : query, - 'language' : request_params['language'], - 'value' : row.get("value"), + 'query': query, + 'language': request_params['language'], + 'value': row.get("value"), # choose a result template or comment out to use the *default* - 'template' : 'key-value.html', + 'template': 'key-value.html', } ret_val.append(entry) diff --git a/searx/engines/demo_online.py b/searx/engines/demo_online.py index a0f736e42..e53b3c15e 100644 --- a/searx/engines/demo_online.py +++ b/searx/engines/demo_online.py @@ -43,6 +43,7 @@ about = { # if there is a need for globals, use a leading underline _my_online_engine = None + def init(engine_settings): """Initialization of the (online) engine. If no initialization is needed, drop this init function. @@ -51,20 +52,24 @@ def init(engine_settings): global _my_online_engine # pylint: disable=global-statement _my_online_engine = engine_settings.get('name') + def request(query, params): """Build up the ``params`` for the online request. In this example we build a URL to fetch images from `artic.edu `__ """ - args = urlencode({ - 'q' : query, - 'page' : params['pageno'], - 'fields' : 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles', - 'limit' : page_size, - }) + args = urlencode( + { + 'q': query, + 'page': params['pageno'], + 'fields': 'id,title,artist_display,medium_display,image_id,date_display,dimensions,artist_titles', + 'limit': page_size, + } + ) params['url'] = search_api + args return params + def response(resp): """Parse out the result items from the response. In this example we parse the response from `api.artic.edu `__ and filter out all @@ -79,14 +84,16 @@ def response(resp): if not result['image_id']: continue - results.append({ - 'url': 'https://artic.edu/artworks/%(id)s' % result, - 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, - 'content': result['medium_display'], - 'author': ', '.join(result['artist_titles']), - 'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result, - 'img_format': result['dimensions'], - 'template': 'images.html' - }) + results.append( + { + 'url': 'https://artic.edu/artworks/%(id)s' % result, + 'title': result['title'] + " (%(date_display)s) // %(artist_display)s" % result, + 'content': result['medium_display'], + 'author': ', '.join(result['artist_titles']), + 'img_src': image_api + '/%(image_id)s/full/843,/0/default.jpg' % result, + 'img_format': result['dimensions'], + 'template': 'images.html', + } + ) return results diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index b13d54dd5..e44ac28e5 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -32,13 +32,14 @@ time_range_dict = { # search-url base_url = 'https://www.deviantart.com' + def request(query, params): # https://www.deviantart.com/search/deviations?page=5&q=foo - query = { - 'page' : params['pageno'], - 'q' : query, + query = { + 'page': params['pageno'], + 'q': query, } if params['time_range'] in time_range_dict: query['order'] = time_range_dict[params['time_range']] @@ -47,6 +48,7 @@ def request(query, params): return params + def response(resp): results = [] @@ -67,11 +69,13 @@ def response(resp): continue img_tag = img_tag[0] - results.append({ - 'template': 'images.html', - 'url': a_tag.attrib.get('href'), - 'img_src': img_tag.attrib.get('src'), - 'title': img_tag.attrib.get('alt'), - }) + results.append( + { + 'template': 'images.html', + 'url': a_tag.attrib.get('href'), + 'img_src': img_tag.attrib.get('src'), + 'title': img_tag.attrib.get('alt'), + } + ) return results diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 4a92a22c3..126e75374 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -27,9 +27,7 @@ https_support = True def request(query, params): - params['url'] = url.format(from_lang=params['from_lang'][2], - to_lang=params['to_lang'][2], - query=params['query']) + params['url'] = url.format(from_lang=params['from_lang'][2], to_lang=params['to_lang'][2], query=params['query']) return params @@ -51,10 +49,12 @@ def response(resp): if t.strip(): to_results.append(to_result.text_content()) - results.append({ - 'url': urljoin(str(resp.url), '?%d' % k), - 'title': from_result.text_content(), - 'content': '; '.join(to_results) - }) + results.append( + { + 'url': urljoin(str(resp.url), '?%d' % k), + 'title': from_result.text_content(), + 'content': '; '.join(to_results), + } + ) return results diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py index 109662a49..2914e9228 100644 --- a/searx/engines/digbt.py +++ b/searx/engines/digbt.py @@ -48,13 +48,17 @@ def response(resp): filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0] - results.append({'url': url, - 'title': title, - 'content': content, - 'filesize': filesize, - 'magnetlink': magnetlink, - 'seed': 'N/A', - 'leech': 'N/A', - 'template': 'torrent.html'}) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'filesize': filesize, + 'magnetlink': magnetlink, + 'seed': 'N/A', + 'leech': 'N/A', + 'template': 'torrent.html', + } + ) return results diff --git a/searx/engines/docker_hub.py b/searx/engines/docker_hub.py index e69f677b3..1e492b196 100644 --- a/searx/engines/docker_hub.py +++ b/searx/engines/docker_hub.py @@ -9,13 +9,13 @@ from urllib.parse import urlencode from dateutil import parser about = { - "website": 'https://hub.docker.com', - "wikidata_id": 'Q100769064', - "official_api_documentation": 'https://docs.docker.com/registry/spec/api/', - "use_official_api": True, - "require_api_key": False, - "results": 'JSON', - } + "website": 'https://hub.docker.com', + "wikidata_id": 'Q100769064', + "official_api_documentation": 'https://docs.docker.com/registry/spec/api/', + "use_official_api": True, + "require_api_key": False, + "results": 'JSON', +} categories = ['it'] # optional paging = True @@ -23,6 +23,7 @@ paging = True base_url = "https://hub.docker.com/" search_url = base_url + "api/content/v1/products/search?{query}&type=image&page_size=25" + def request(query, params): params['url'] = search_url.format(query=urlencode(dict(q=query, page=params["pageno"]))) @@ -30,6 +31,7 @@ def request(query, params): return params + def response(resp): '''post-response callback resp: requests response object @@ -53,12 +55,8 @@ def response(resp): result["url"] = base_url + "r/" + item.get('slug', "") result["title"] = item.get("name") result["content"] = item.get("short_description") - result["publishedDate"] = parser.parse( - item.get("updated_at") or item.get("created_at") - ) - result["thumbnail"] = ( - item["logo_url"].get("large") or item["logo_url"].get("small") - ) + result["publishedDate"] = parser.parse(item.get("updated_at") or item.get("created_at")) + result["thumbnail"] = item["logo_url"].get("large") or item["logo_url"].get("small") results.append(result) return results diff --git a/searx/engines/doku.py b/searx/engines/doku.py index cf38b3b9a..08f56bbe7 100644 --- a/searx/engines/doku.py +++ b/searx/engines/doku.py @@ -25,17 +25,20 @@ number_of_results = 5 # search-url # Doku is OpenSearch compatible base_url = 'http://localhost:8090' -search_url = '/?do=search'\ - '&{query}' -# TODO '&startRecord={offset}'\ -# TODO '&maximumRecords={limit}'\ +search_url = ( + # fmt: off + '/?do=search' + '&{query}' + # fmt: on +) +# TODO '&startRecord={offset}' +# TODO '&maximumRecords={limit}' # do search-request def request(query, params): - params['url'] = base_url +\ - search_url.format(query=urlencode({'id': query})) + params['url'] = base_url + search_url.format(query=urlencode({'id': query})) return params @@ -60,9 +63,7 @@ def response(resp): title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title')) # append result - results.append({'title': title, - 'content': "", - 'url': base_url + res_url}) + results.append({'title': title, 'content': "", 'url': base_url + res_url}) # Search results for r in eval_xpath(doc, '//dl[@class="search_results"]/*'): @@ -74,9 +75,7 @@ def response(resp): content = extract_text(eval_xpath(r, '.')) # append result - results.append({'title': title, - 'content': content, - 'url': base_url + res_url}) + results.append({'title': title, 'content': content, 'url': base_url + res_url}) except: continue diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index d283af81d..0d2a524df 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -39,15 +39,10 @@ language_aliases = { 'ko': 'kr-KR', 'sl-SI': 'sl-SL', 'zh-TW': 'tzh-TW', - 'zh-HK': 'tzh-HK' + 'zh-HK': 'tzh-HK', } -time_range_dict = { - 'day': 'd', - 'week': 'w', - 'month': 'm', - 'year': 'y' -} +time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} # search-url url = 'https://lite.duckduckgo.com/lite' @@ -118,6 +113,7 @@ def request(query, params): logger.debug("param cookies: %s", params['cookies']) return params + # get response from search-request def response(resp): @@ -163,21 +159,24 @@ def response(resp): if td_content is None: continue - results.append({ - 'title': a_tag.text_content(), - 'content': extract_text(td_content), - 'url': a_tag.get('href'), - }) + results.append( + { + 'title': a_tag.text_content(), + 'content': extract_text(td_content), + 'url': a_tag.get('href'), + } + ) return results + # get supported languages from their site def _fetch_supported_languages(resp): # response is a js file with regions as an embedded object response_page = resp.text - response_page = response_page[response_page.find('regions:{') + 8:] - response_page = response_page[:response_page.find('}') + 1] + response_page = response_page[response_page.find('regions:{') + 8 :] + response_page = response_page[: response_page.find('}') + 1] regions_json = loads(response_page) supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys()) diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 3ef043964..ad3c92169 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -10,7 +10,10 @@ from lxml import html from searx.data import WIKIDATA_UNITS from searx.engines.duckduckgo import language_aliases -from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +from searx.engines.duckduckgo import ( # pylint: disable=unused-import + _fetch_supported_languages, + supported_languages_url, +) from searx.utils import extract_text, html_to_text, match_language, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom @@ -24,19 +27,15 @@ about = { "results": 'JSON', } -URL = 'https://api.duckduckgo.com/'\ - + '?{query}&format=json&pretty=0&no_redirect=1&d=1' +URL = 'https://api.duckduckgo.com/' + '?{query}&format=json&pretty=0&no_redirect=1&d=1' -WIKIDATA_PREFIX = [ - 'http://www.wikidata.org/entity/', - 'https://www.wikidata.org/entity/' -] +WIKIDATA_PREFIX = ['http://www.wikidata.org/entity/', 'https://www.wikidata.org/entity/'] replace_http_by_https = get_string_replaces_function({'http:': 'https:'}) def is_broken_text(text): - """ duckduckgo may return something like "http://somewhere Related website" + """duckduckgo may return something like "http://somewhere Related website" The href URL is broken, the "Related website" may contains some HTML. @@ -61,11 +60,7 @@ def result_to_text(text, htmlResult): def request(query, params): params['url'] = URL.format(query=urlencode({'q': query})) - language = match_language( - params['language'], - supported_languages, - language_aliases - ) + language = match_language(params['language'], supported_languages, language_aliases) language = language.split('-')[0] params['headers']['Accept-Language'] = language return params @@ -127,23 +122,14 @@ def response(resp): firstURL = ddg_result.get('FirstURL') text = ddg_result.get('Text') if not is_broken_text(text): - suggestion = result_to_text( - text, - ddg_result.get('Result') - ) + suggestion = result_to_text(text, ddg_result.get('Result')) if suggestion != heading and suggestion is not None: results.append({'suggestion': suggestion}) elif 'Topics' in ddg_result: suggestions = [] - relatedTopics.append({ - 'name': ddg_result.get('Name', ''), - 'suggestions': suggestions - }) + relatedTopics.append({'name': ddg_result.get('Name', ''), 'suggestions': suggestions}) for topic_result in ddg_result.get('Topics', []): - suggestion = result_to_text( - topic_result.get('Text'), - topic_result.get('Result') - ) + suggestion = result_to_text(topic_result.get('Text'), topic_result.get('Result')) if suggestion != heading and suggestion is not None: suggestions.append(suggestion) @@ -152,25 +138,15 @@ def response(resp): if abstractURL != '': # add as result ? problem always in english infobox_id = abstractURL - urls.append({ - 'title': search_res.get('AbstractSource'), - 'url': abstractURL, - 'official': True - }) - results.append({ - 'url': abstractURL, - 'title': heading - }) + urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL, 'official': True}) + results.append({'url': abstractURL, 'title': heading}) # definition definitionURL = search_res.get('DefinitionURL', '') if definitionURL != '': # add as result ? as answer ? problem always in english infobox_id = definitionURL - urls.append({ - 'title': search_res.get('DefinitionSource'), - 'url': definitionURL - }) + urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL}) # to merge with wikidata's infobox if infobox_id: @@ -198,10 +174,7 @@ def response(resp): # * netflix_id external_url = get_external_url(data_type, data_value) if external_url is not None: - urls.append({ - 'title': data_label, - 'url': external_url - }) + urls.append({'title': data_label, 'url': external_url}) elif data_type in ['instance', 'wiki_maps_trigger', 'google_play_artist_id']: # ignore instance: Wikidata value from "Instance Of" (Qxxxx) # ignore wiki_maps_trigger: reference to a javascript @@ -211,11 +184,7 @@ def response(resp): # There is already an URL for the website pass elif data_type == 'area': - attributes.append({ - 'label': data_label, - 'value': area_to_str(data_value), - 'entity': 'P2046' - }) + attributes.append({'label': data_label, 'value': area_to_str(data_value), 'entity': 'P2046'}) osm_zoom = area_to_osm_zoom(data_value.get('amount')) elif data_type == 'coordinates': if data_value.get('globe') == 'http://www.wikidata.org/entity/Q2': @@ -224,16 +193,9 @@ def response(resp): coordinates = info else: # coordinate NOT on Earth - attributes.append({ - 'label': data_label, - 'value': data_value, - 'entity': 'P625' - }) + attributes.append({'label': data_label, 'value': data_value, 'entity': 'P625'}) elif data_type == 'string': - attributes.append({ - 'label': data_label, - 'value': data_value - }) + attributes.append({'label': data_label, 'value': data_value}) if coordinates: data_label = coordinates.get('label') @@ -241,31 +203,24 @@ def response(resp): latitude = data_value.get('latitude') longitude = data_value.get('longitude') url = get_earth_coordinates_url(latitude, longitude, osm_zoom) - urls.append({ - 'title': 'OpenStreetMap', - 'url': url, - 'entity': 'P625' - }) + urls.append({'title': 'OpenStreetMap', 'url': url, 'entity': 'P625'}) if len(heading) > 0: # TODO get infobox.meta.value where .label='article_title' # pylint: disable=fixme - if image is None and len(attributes) == 0 and len(urls) == 1 and\ - len(relatedTopics) == 0 and len(content) == 0: - results.append({ - 'url': urls[0]['url'], - 'title': heading, - 'content': content - }) + if image is None and len(attributes) == 0 and len(urls) == 1 and len(relatedTopics) == 0 and len(content) == 0: + results.append({'url': urls[0]['url'], 'title': heading, 'content': content}) else: - results.append({ - 'infobox': heading, - 'id': infobox_id, - 'content': content, - 'img_src': image, - 'attributes': attributes, - 'urls': urls, - 'relatedTopics': relatedTopics - }) + results.append( + { + 'infobox': heading, + 'id': infobox_id, + 'content': content, + 'img_src': image, + 'attributes': attributes, + 'urls': urls, + 'relatedTopics': relatedTopics, + } + ) return results @@ -273,7 +228,7 @@ def response(resp): def unit_to_str(unit): for prefix in WIKIDATA_PREFIX: if unit.startswith(prefix): - wikidata_entity = unit[len(prefix):] + wikidata_entity = unit[len(prefix) :] return WIKIDATA_UNITS.get(wikidata_entity, unit) return unit diff --git a/searx/engines/duckduckgo_images.py b/searx/engines/duckduckgo_images.py index 0daaf41e9..2f75e16f1 100644 --- a/searx/engines/duckduckgo_images.py +++ b/searx/engines/duckduckgo_images.py @@ -7,7 +7,10 @@ from json import loads from urllib.parse import urlencode from searx.exceptions import SearxEngineAPIException from searx.engines.duckduckgo import get_region_code -from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +from searx.engines.duckduckgo import ( # pylint: disable=unused-import + _fetch_supported_languages, + supported_languages_url, +) from searx.network import get # about @@ -41,8 +44,8 @@ def get_vqd(query, headers): content = res.text if content.find('vqd=\'') == -1: raise SearxEngineAPIException('Request failed') - vqd = content[content.find('vqd=\'') + 5:] - vqd = vqd[:vqd.find('\'')] + vqd = content[content.find('vqd=\'') + 5 :] + vqd = vqd[: vqd.find('\'')] return vqd @@ -61,10 +64,10 @@ def request(query, params): region_code = get_region_code(params['language'], lang_list=supported_languages) if region_code: params['url'] = images_url.format( - query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd) + query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd + ) else: - params['url'] = images_url.format( - query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd) + params['url'] = images_url.format(query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd) return params @@ -84,11 +87,15 @@ def response(resp): image = result['image'] # append result - results.append({'template': 'images.html', - 'title': title, - 'content': '', - 'thumbnail_src': thumbnail, - 'img_src': image, - 'url': url}) + results.append( + { + 'template': 'images.html', + 'title': title, + 'content': '', + 'thumbnail_src': thumbnail, + 'img_src': image, + 'url': url, + } + ) return results diff --git a/searx/engines/duden.py b/searx/engines/duden.py index bc4211c67..600b61f3c 100644 --- a/searx/engines/duden.py +++ b/searx/engines/duden.py @@ -38,7 +38,7 @@ def request(query, params): pageno : 1 # number of the requested page ''' - offset = (params['pageno'] - 1) + offset = params['pageno'] - 1 if offset == 0: search_url_fmt = base_url + 'suchen/dudenonline/{query}' params['url'] = search_url_fmt.format(query=quote(query)) @@ -58,9 +58,9 @@ def response(resp): dom = html.fromstring(resp.text) - number_of_results_element =\ - eval_xpath_getindex(dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()', - 0, default=None) + number_of_results_element = eval_xpath_getindex( + dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()', 0, default=None + ) if number_of_results_element is not None: number_of_results_string = re.sub('[^0-9]', '', number_of_results_element) results.append({'number_of_results': int(number_of_results_string)}) @@ -71,8 +71,6 @@ def response(resp): title = eval_xpath(result, 'string(.//h2/a)').strip() content = extract_text(eval_xpath(result, './/p')) # append result - results.append({'url': url, - 'title': title, - 'content': content}) + results.append({'url': url, 'title': title, 'content': content}) return results diff --git a/searx/engines/dummy-offline.py b/searx/engines/dummy-offline.py index cf2f75312..632eeb2b3 100644 --- a/searx/engines/dummy-offline.py +++ b/searx/engines/dummy-offline.py @@ -15,6 +15,8 @@ about = { def search(query, request_params): - return [{ - 'result': 'this is what you get', - }] + return [ + { + 'result': 'this is what you get', + } + ] diff --git a/searx/engines/ebay.py b/searx/engines/ebay.py index 45c633b42..b7aefcb44 100644 --- a/searx/engines/ebay.py +++ b/searx/engines/ebay.py @@ -58,16 +58,17 @@ def response(resp): if title == "": continue - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'price': price, - 'shipping': shipping, - 'source_country': source_country, - 'thumbnail': thumbnail, - 'template': 'products.html', - - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'price': price, + 'shipping': shipping, + 'source_country': source_country, + 'thumbnail': thumbnail, + 'template': 'products.html', + } + ) return results diff --git a/searx/engines/elasticsearch.py b/searx/engines/elasticsearch.py index db84a5c13..f6e207b4d 100644 --- a/searx/engines/elasticsearch.py +++ b/searx/engines/elasticsearch.py @@ -119,9 +119,7 @@ def response(resp): r['template'] = 'key-value.html' if show_metadata: - r['metadata'] = {'index': result['_index'], - 'id': result['_id'], - 'score': result['_score']} + r['metadata'] = {'index': result['_index'], 'id': result['_id'], 'score': result['_score']} results.append(r) @@ -133,12 +131,10 @@ _available_query_types = { # https://www.elastic.co/guide/en/elasticsearch/reference/current/full-text-queries.html 'match': _match_query, 'simple_query_string': _simple_query_string_query, - # Term-level queries # https://www.elastic.co/guide/en/elasticsearch/reference/current/term-level-queries.html 'term': _term_query, 'terms': _terms_query, - # Query JSON defined by the instance administrator. 'custom': _custom_query, } diff --git a/searx/engines/etools.py b/searx/engines/etools.py index bf4f4ea1f..347463291 100644 --- a/searx/engines/etools.py +++ b/searx/engines/etools.py @@ -22,10 +22,14 @@ paging = False safesearch = True base_url = 'https://www.etools.ch' -search_path = '/searchAdvancedSubmit.do'\ - '?query={search_term}'\ - '&pageResults=20'\ +search_path = ( + # fmt: off + '/searchAdvancedSubmit.do' + '?query={search_term}' + '&pageResults=20' '&safeSearch={safesearch}' + # fmt: on +) def request(query, params): @@ -49,8 +53,6 @@ def response(resp): title = extract_text(eval_xpath(result, './a//text()')) content = extract_text(eval_xpath(result, './/div[@class="text"]//text()')) - results.append({'url': url, - 'title': title, - 'content': content}) + results.append({'url': url, 'title': title, 'content': content}) return results diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py index 8fff2e384..c381b25d4 100644 --- a/searx/engines/fdroid.py +++ b/searx/engines/fdroid.py @@ -42,13 +42,13 @@ def response(resp): for app in dom.xpath('//a[@class="package-header"]'): app_url = app.xpath('./@href')[0] app_title = extract_text(app.xpath('./div/h4[@class="package-name"]/text()')) - app_content = extract_text(app.xpath('./div/div/span[@class="package-summary"]')).strip() \ - + ' - ' + extract_text(app.xpath('./div/div/span[@class="package-license"]')).strip() + app_content = ( + extract_text(app.xpath('./div/div/span[@class="package-summary"]')).strip() + + ' - ' + + extract_text(app.xpath('./div/div/span[@class="package-license"]')).strip() + ) app_img_src = app.xpath('./img[@class="package-icon"]/@src')[0] - results.append({'url': app_url, - 'title': app_title, - 'content': app_content, - 'img_src': app_img_src}) + results.append({'url': app_url, 'title': app_title, 'content': app_content, 'img_src': app_img_src}) return results diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index b0ddf6224..b7cd76808 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -25,10 +25,12 @@ paging = True api_key = None -url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\ - '&api_key={api_key}&{text}&sort=relevance' +\ - '&extras=description%2C+owner_name%2C+url_o%2C+url_n%2C+url_z' +\ - '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' +url = ( + 'https://api.flickr.com/services/rest/?method=flickr.photos.search' + + '&api_key={api_key}&{text}&sort=relevance' + + '&extras=description%2C+owner_name%2C+url_o%2C+url_n%2C+url_z' + + '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' +) photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' paging = True @@ -39,10 +41,9 @@ def build_flickr_url(user_id, photo_id): def request(query, params): - params['url'] = url.format(text=urlencode({'text': query}), - api_key=api_key, - nb_per_page=nb_per_page, - page=params['pageno']) + params['url'] = url.format( + text=urlencode({'text': query}), api_key=api_key, nb_per_page=nb_per_page, page=params['pageno'] + ) return params @@ -69,7 +70,7 @@ def response(resp): else: continue -# For a bigger thumbnail, keep only the url_z, not the url_n + # For a bigger thumbnail, keep only the url_z, not the url_n if 'url_n' in photo: thumbnail_src = photo['url_n'] elif 'url_z' in photo: @@ -80,13 +81,17 @@ def response(resp): url = build_flickr_url(photo['owner'], photo['id']) # append result - results.append({'url': url, - 'title': photo['title'], - 'img_src': img_src, - 'thumbnail_src': thumbnail_src, - 'content': photo['description']['_content'], - 'author': photo['ownername'], - 'template': 'images.html'}) + results.append( + { + 'url': url, + 'title': photo['title'], + 'img_src': img_src, + 'thumbnail_src': thumbnail_src, + 'content': photo['description']['_content'], + 'author': photo['ownername'], + 'template': 'images.html', + } + ) # return results return results diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 1d670ee50..4ff59fc52 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -30,10 +30,12 @@ image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's') paging = True time_range_support = True -time_range_dict = {'day': 60 * 60 * 24, - 'week': 60 * 60 * 24 * 7, - 'month': 60 * 60 * 24 * 7 * 4, - 'year': 60 * 60 * 24 * 7 * 52} +time_range_dict = { + 'day': 60 * 60 * 24, + 'week': 60 * 60 * 24 * 7, + 'month': 60 * 60 * 24 * 7 * 4, + 'year': 60 * 60 * 24 * 7 * 52, +} def build_flickr_url(user_id, photo_id): @@ -47,8 +49,9 @@ def _get_time_range_url(time_range): def request(query, params): - params['url'] = (search_url.format(query=urlencode({'text': query}), page=params['pageno']) - + _get_time_range_url(params['time_range'])) + params['url'] = search_url.format(query=urlencode({'text': query}), page=params['pageno']) + _get_time_range_url( + params['time_range'] + ) return params @@ -83,10 +86,9 @@ def response(resp): for image_size in image_sizes: if image_size in photo['sizes']: img_src = photo['sizes'][image_size]['url'] - img_format = 'jpg ' \ - + str(photo['sizes'][image_size]['width']) \ - + 'x' \ - + str(photo['sizes'][image_size]['height']) + img_format = ( + 'jpg ' + str(photo['sizes'][image_size]['width']) + 'x' + str(photo['sizes'][image_size]['height']) + ) break if not img_src: @@ -113,7 +115,7 @@ def response(resp): 'thumbnail_src': thumbnail_src, 'source': source, 'img_format': img_format, - 'template': 'images.html' + 'template': 'images.html', } result['author'] = author.encode(errors='ignore').decode() result['source'] = source.encode(errors='ignore').decode() diff --git a/searx/engines/framalibre.py b/searx/engines/framalibre.py index 42c08cf95..b2c9d9077 100644 --- a/searx/engines/framalibre.py +++ b/searx/engines/framalibre.py @@ -35,9 +35,8 @@ content_xpath = './/div[@class="content"]//p' # do search-request def request(query, params): - offset = (params['pageno'] - 1) - params['url'] = search_url.format(query=urlencode({'keys': query}), - offset=offset) + offset = params['pageno'] - 1 + params['url'] = search_url.format(query=urlencode({'keys': query}), offset=offset) return params @@ -63,10 +62,7 @@ def response(resp): content = escape(extract_text(result.xpath(content_xpath))) # append result - results.append({'url': href, - 'title': title, - 'img_src': thumbnail, - 'content': content}) + results.append({'url': href, 'title': title, 'img_src': thumbnail, 'content': content}) # return results return results diff --git a/searx/engines/freesound.py b/searx/engines/freesound.py index d2564946c..121a6a5b0 100644 --- a/searx/engines/freesound.py +++ b/searx/engines/freesound.py @@ -26,8 +26,7 @@ paging = True # search url url = "https://freesound.org/apiv2/" search_url = ( - url - + "search/text/?query={query}&page={page}&fields=name,url,download,created,description,type&token={api_key}" + url + "search/text/?query={query}&page={page}&fields=name,url,download,created,description,type&token={api_key}" ) embedded_url = '' diff --git a/searx/engines/frinkiac.py b/searx/engines/frinkiac.py index f43bb6e20..95a1366de 100644 --- a/searx/engines/frinkiac.py +++ b/searx/engines/frinkiac.py @@ -10,10 +10,7 @@ from urllib.parse import urlencode about = { "website": 'https://frinkiac.com', "wikidata_id": 'Q24882614', - "official_api_documentation": { - 'url': None, - 'comment': 'see https://github.com/MitchellAW/CompuGlobal' - }, + "official_api_documentation": {'url': None, 'comment': 'see https://github.com/MitchellAW/CompuGlobal'}, "use_official_api": False, "require_api_key": False, "results": 'JSON', @@ -40,12 +37,15 @@ def response(resp): episode = result['Episode'] timestamp = result['Timestamp'] - results.append({'template': 'images.html', - 'url': RESULT_URL.format(base=BASE, - query=urlencode({'p': 'caption', 'e': episode, 't': timestamp})), - 'title': episode, - 'content': '', - 'thumbnail_src': THUMB_URL.format(base=BASE, episode=episode, timestamp=timestamp), - 'img_src': IMAGE_URL.format(base=BASE, episode=episode, timestamp=timestamp)}) + results.append( + { + 'template': 'images.html', + 'url': RESULT_URL.format(base=BASE, query=urlencode({'p': 'caption', 'e': episode, 't': timestamp})), + 'title': episode, + 'content': '', + 'thumbnail_src': THUMB_URL.format(base=BASE, episode=episode, timestamp=timestamp), + 'img_src': IMAGE_URL.format(base=BASE, episode=episode, timestamp=timestamp), + } + ) return results diff --git a/searx/engines/gentoo.py b/searx/engines/gentoo.py index 325e132a6..5b9edafe0 100644 --- a/searx/engines/gentoo.py +++ b/searx/engines/gentoo.py @@ -37,15 +37,12 @@ def locale_to_lang_code(locale): # wikis for some languages were moved off from the main site, we need to make # requests to correct URLs to be able to get results in those languages lang_urls = { - 'en': { - 'base': 'https://wiki.gentoo.org', - 'search': '/index.php?title=Special:Search&offset={offset}&{query}' - }, + 'en': {'base': 'https://wiki.gentoo.org', 'search': '/index.php?title=Special:Search&offset={offset}&{query}'}, 'others': { 'base': 'https://wiki.gentoo.org', 'search': '/index.php?title=Special:Search&offset={offset}&{query}\ - &profile=translation&languagefilter={language}' - } + &profile=translation&languagefilter={language}', + }, } @@ -78,7 +75,7 @@ main_langs = { 'sl': 'Slovenský', 'th': 'ไทย', 'uk': 'Українська', - 'zh': '简体中文' + 'zh': '简体中文', } supported_languages = dict(lang_urls, **main_langs) @@ -101,8 +98,7 @@ def request(query, params): urls = get_lang_urls(language) search_url = urls['base'] + urls['search'] - params['url'] = search_url.format(query=query, offset=offset, - language=language) + params['url'] = search_url.format(query=query, offset=offset, language=language) return params @@ -123,7 +119,6 @@ def response(resp): href = urljoin(base_url, link.attrib.get('href')) title = extract_text(link) - results.append({'url': href, - 'title': title}) + results.append({'url': href, 'title': title}) return results diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 0f685abc5..c657dca30 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -55,12 +55,12 @@ def fetch_extra_param(query_args, headers): extra_param_path = search_path + urlencode(query_args) text = get(base_url + extra_param_path, headers=headers).text - re_var= None + re_var = None for line in text.splitlines(): if re_var is None and extra_param_path in line: var = line.split("=")[0].split()[1] # e.g. var --> 'uxrl' re_var = re.compile(var + "\\s*=\\s*" + var + "\\s*\\+\\s*'" + "(.*)" + "'(.*)") - extra_param = line.split("'")[1][len(extra_param_path):] + extra_param = line.split("'")[1][len(extra_param_path) :] continue if re_var is not None and re_var.search(line): extra_param += re_var.search(line).group(1) @@ -69,12 +69,7 @@ def fetch_extra_param(query_args, headers): # do search-request def request(query, params): # pylint: disable=unused-argument - query_args = dict( - c = 'main' - , q = query - , dr = 1 - , showgoodimages = 0 - ) + query_args = dict(c='main', q=query, dr=1, showgoodimages=0) if params['language'] and params['language'] != 'all': query_args['qlangcountry'] = params['language'] @@ -93,6 +88,7 @@ def request(query, params): # pylint: disable=unused-argument return params + # get response from search-request def response(resp): results = [] @@ -125,10 +121,6 @@ def response(resp): if len(subtitle) > 3 and subtitle != title: title += " - " + subtitle - results.append(dict( - url = url - , title = title - , content = content - )) + results.append(dict(url=url, title=title, content=content)) return results diff --git a/searx/engines/github.py b/searx/engines/github.py index b68caa350..1d12d296a 100644 --- a/searx/engines/github.py +++ b/searx/engines/github.py @@ -55,9 +55,7 @@ def response(resp): content = '' # append result - results.append({'url': url, - 'title': title, - 'content': content}) + results.append({'url': url, 'title': title, 'content': content}) # return results return results diff --git a/searx/engines/google.py b/searx/engines/google.py index 578dec60c..685697d29 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -50,72 +50,63 @@ supported_languages_url = 'https://www.google.com/preferences?#languages' # based on https://en.wikipedia.org/wiki/List_of_Google_domains and tests google_domains = { - 'BG': 'google.bg', # Bulgaria - 'CZ': 'google.cz', # Czech Republic - 'DE': 'google.de', # Germany - 'DK': 'google.dk', # Denmark - 'AT': 'google.at', # Austria - 'CH': 'google.ch', # Switzerland - 'GR': 'google.gr', # Greece + 'BG': 'google.bg', # Bulgaria + 'CZ': 'google.cz', # Czech Republic + 'DE': 'google.de', # Germany + 'DK': 'google.dk', # Denmark + 'AT': 'google.at', # Austria + 'CH': 'google.ch', # Switzerland + 'GR': 'google.gr', # Greece 'AU': 'google.com.au', # Australia - 'CA': 'google.ca', # Canada - 'GB': 'google.co.uk', # United Kingdom - 'ID': 'google.co.id', # Indonesia - 'IE': 'google.ie', # Ireland - 'IN': 'google.co.in', # India + 'CA': 'google.ca', # Canada + 'GB': 'google.co.uk', # United Kingdom + 'ID': 'google.co.id', # Indonesia + 'IE': 'google.ie', # Ireland + 'IN': 'google.co.in', # India 'MY': 'google.com.my', # Malaysia - 'NZ': 'google.co.nz', # New Zealand + 'NZ': 'google.co.nz', # New Zealand 'PH': 'google.com.ph', # Philippines 'SG': 'google.com.sg', # Singapore - 'US': 'google.com', # United States (google.us) redirects to .com - 'ZA': 'google.co.za', # South Africa + 'US': 'google.com', # United States (google.us) redirects to .com + 'ZA': 'google.co.za', # South Africa 'AR': 'google.com.ar', # Argentina - 'CL': 'google.cl', # Chile - 'ES': 'google.es', # Spain + 'CL': 'google.cl', # Chile + 'ES': 'google.es', # Spain 'MX': 'google.com.mx', # Mexico - 'EE': 'google.ee', # Estonia - 'FI': 'google.fi', # Finland - 'BE': 'google.be', # Belgium - 'FR': 'google.fr', # France - 'IL': 'google.co.il', # Israel - 'HR': 'google.hr', # Croatia - 'HU': 'google.hu', # Hungary - 'IT': 'google.it', # Italy - 'JP': 'google.co.jp', # Japan - 'KR': 'google.co.kr', # South Korea - 'LT': 'google.lt', # Lithuania - 'LV': 'google.lv', # Latvia - 'NO': 'google.no', # Norway - 'NL': 'google.nl', # Netherlands - 'PL': 'google.pl', # Poland + 'EE': 'google.ee', # Estonia + 'FI': 'google.fi', # Finland + 'BE': 'google.be', # Belgium + 'FR': 'google.fr', # France + 'IL': 'google.co.il', # Israel + 'HR': 'google.hr', # Croatia + 'HU': 'google.hu', # Hungary + 'IT': 'google.it', # Italy + 'JP': 'google.co.jp', # Japan + 'KR': 'google.co.kr', # South Korea + 'LT': 'google.lt', # Lithuania + 'LV': 'google.lv', # Latvia + 'NO': 'google.no', # Norway + 'NL': 'google.nl', # Netherlands + 'PL': 'google.pl', # Poland 'BR': 'google.com.br', # Brazil - 'PT': 'google.pt', # Portugal - 'RO': 'google.ro', # Romania - 'RU': 'google.ru', # Russia - 'SK': 'google.sk', # Slovakia - 'SI': 'google.si', # Slovenia - 'SE': 'google.se', # Sweden - 'TH': 'google.co.th', # Thailand + 'PT': 'google.pt', # Portugal + 'RO': 'google.ro', # Romania + 'RU': 'google.ru', # Russia + 'SK': 'google.sk', # Slovakia + 'SI': 'google.si', # Slovenia + 'SE': 'google.se', # Sweden + 'TH': 'google.co.th', # Thailand 'TR': 'google.com.tr', # Turkey 'UA': 'google.com.ua', # Ukraine 'CN': 'google.com.hk', # There is no google.cn, we use .com.hk for zh-CN 'HK': 'google.com.hk', # Hong Kong - 'TW': 'google.com.tw' # Taiwan + 'TW': 'google.com.tw', # Taiwan } -time_range_dict = { - 'day': 'd', - 'week': 'w', - 'month': 'm', - 'year': 'y' -} +time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} # Filter results. 0: None, 1: Moderate, 2: Strict -filter_mapping = { - 0: 'off', - 1: 'medium', - 2: 'high' -} +filter_mapping = {0: 'off', 1: 'medium', 2: 'high'} # specific xpath variables # ------------------------ @@ -140,6 +131,7 @@ content_xpath = './/div[@class="IsZvec"]' # from the links not the links itself. suggestion_xpath = '//div[contains(@class, "EIaa9b")]//a' + def get_lang_info(params, lang_list, custom_aliases, supported_any_language): """Composing various language properties for the google engines. @@ -184,11 +176,11 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): request's headers) """ ret_val = { - 'language' : None, - 'country' : None, - 'subdomain' : None, - 'params' : {}, - 'headers' : {}, + 'language': None, + 'country': None, + 'subdomain': None, + 'params': {}, + 'headers': {}, } # language ... @@ -213,7 +205,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): # subdomain ... - ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com') + ret_val['subdomain'] = 'www.' + google_domains.get(country.upper(), 'google.com') # params & headers @@ -250,15 +242,18 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): ret_val['params']['lr'] = "lang_" + lang_list.get(lang_country, language) # Accept-Language: fr-CH, fr;q=0.8, en;q=0.6, *;q=0.5 - ret_val['headers']['Accept-Language'] = ','.join([ - lang_country, - language + ';q=0.8,', - 'en;q=0.6', - '*;q=0.5', - ]) + ret_val['headers']['Accept-Language'] = ','.join( + [ + lang_country, + language + ';q=0.8,', + 'en;q=0.6', + '*;q=0.5', + ] + ) return ret_val + def detect_google_sorry(resp): if resp.url.host == 'sorry.google.com' or resp.url.path.startswith('/sorry'): raise SearxEngineCaptchaException() @@ -269,9 +264,7 @@ def request(query, params): offset = (params['pageno'] - 1) * 10 - lang_info = get_lang_info( - params, supported_languages, language_aliases, True - ) + lang_info = get_lang_info(params, supported_languages, language_aliases, True) additional_parameters = {} if use_mobile_ui: @@ -281,15 +274,23 @@ def request(query, params): } # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium - query_url = 'https://' + lang_info['subdomain'] + '/search' + "?" + urlencode({ - 'q': query, - **lang_info['params'], - 'ie': "utf8", - 'oe': "utf8", - 'start': offset, - 'filter': '0', - **additional_parameters, - }) + query_url = ( + 'https://' + + lang_info['subdomain'] + + '/search' + + "?" + + urlencode( + { + 'q': query, + **lang_info['params'], + 'ie': "utf8", + 'oe': "utf8", + 'start': offset, + 'filter': '0', + **additional_parameters, + } + ) + ) if params['time_range'] in time_range_dict: query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]}) @@ -301,9 +302,7 @@ def request(query, params): if use_mobile_ui: params['headers']['Accept'] = '*/*' else: - params['headers']['Accept'] = ( - 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' - ) + params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' return params @@ -325,7 +324,7 @@ def response(resp): else: logger.debug("did not find 'answer'") - # results --> number_of_results + # results --> number_of_results if not use_mobile_ui: try: _txt = eval_xpath_getindex(dom, '//div[@id="result-stats"]//text()', 0) @@ -355,11 +354,7 @@ def response(resp): if url is None: continue content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True) - results.append({ - 'url': url, - 'title': title, - 'content': content - }) + results.append({'url': url, 'title': title, 'content': content}) except Exception as e: # pylint: disable=broad-except logger.error(e, exc_info=True) # from lxml import etree diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index 61d291e3f..203df404a 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -30,10 +30,8 @@ from searx.engines.google import ( ) # pylint: disable=unused-import -from searx.engines.google import ( - supported_languages_url - , _fetch_supported_languages -) +from searx.engines.google import supported_languages_url, _fetch_supported_languages + # pylint: enable=unused-import # about @@ -53,21 +51,16 @@ use_locale_domain = True time_range_support = True safesearch = True -filter_mapping = { - 0: 'images', - 1: 'active', - 2: 'active' -} +filter_mapping = {0: 'images', 1: 'active', 2: 'active'} def scrap_out_thumbs(dom): - """Scrap out thumbnail data from ')] + response_text = response_text[response_text.find('INITIAL_PROPS') :] + response_text = response_text[response_text.find('{') : response_text.find('')] regions_json = loads(response_text) diff --git a/searx/engines/recoll.py b/searx/engines/recoll.py index 42f2858d7..ebcd83b8d 100644 --- a/searx/engines/recoll.py +++ b/searx/engines/recoll.py @@ -28,18 +28,12 @@ mount_prefix = None dl_prefix = None # embedded -embedded_url = '<{ttype} controls height="166px" ' +\ - 'src="{url}" type="{mtype}">' +embedded_url = '<{ttype} controls height="166px" ' + 'src="{url}" type="{mtype}">' # helper functions def get_time_range(time_range): - sw = { - 'day': 1, - 'week': 7, - 'month': 30, - 'year': 365 - } + sw = {'day': 1, 'week': 7, 'month': 30, 'year': 365} offset = sw.get(time_range, 0) if not offset: @@ -52,11 +46,9 @@ def get_time_range(time_range): def request(query, params): search_after = get_time_range(params['time_range']) search_url = base_url + 'json?{query}&highlight=0' - params['url'] = search_url.format(query=urlencode({ - 'query': query, - 'page': params['pageno'], - 'after': search_after, - 'dir': search_dir})) + params['url'] = search_url.format( + query=urlencode({'query': query, 'page': params['pageno'], 'after': search_after, 'dir': search_dir}) + ) return params @@ -76,10 +68,7 @@ def response(resp): content = '{}'.format(result['snippet']) # append result - item = {'url': url, - 'title': title, - 'content': content, - 'template': 'files.html'} + item = {'url': url, 'title': title, 'content': content, 'template': 'files.html'} if result['size']: item['size'] = int(result['size']) @@ -96,9 +85,8 @@ def response(resp): if mtype in ['audio', 'video']: item['embedded'] = embedded_url.format( - ttype=mtype, - url=quote(url.encode('utf8'), '/:'), - mtype=result['mtype']) + ttype=mtype, url=quote(url.encode('utf8'), '/:'), mtype=result['mtype'] + ) if mtype in ['image'] and subtype in ['bmp', 'gif', 'jpeg', 'png']: item['img_src'] = url diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py index ca6cb28a8..36d92339d 100644 --- a/searx/engines/reddit.py +++ b/searx/engines/reddit.py @@ -52,10 +52,7 @@ def response(resp): data = post['data'] # extract post information - params = { - 'url': urljoin(base_url, data['permalink']), - 'title': data['title'] - } + params = {'url': urljoin(base_url, data['permalink']), 'title': data['title']} # if thumbnail field contains a valid URL, we need to change template thumbnail = data['thumbnail'] diff --git a/searx/engines/redis_server.py b/searx/engines/redis_server.py index f9726033d..03786f81d 100644 --- a/searx/engines/redis_server.py +++ b/searx/engines/redis_server.py @@ -20,16 +20,19 @@ result_template = 'key-value.html' exact_match_only = True _redis_client = None + + def init(_engine_settings): global _redis_client # pylint: disable=global-statement _redis_client = redis.StrictRedis( - host = host, - port = port, - db = db, - password = password or None, - decode_responses = True, + host=host, + port=port, + db=db, + password=password or None, + decode_responses=True, ) + def search(query, _params): if not exact_match_only: return search_keys(query) @@ -42,21 +45,20 @@ def search(query, _params): if ' ' in query: qset, rest = query.split(' ', 1) ret = [] - for res in _redis_client.hscan_iter( - qset, match='*{}*'.format(rest) - ): - ret.append({ - res[0]: res[1], - 'template': result_template, - }) + for res in _redis_client.hscan_iter(qset, match='*{}*'.format(rest)): + ret.append( + { + res[0]: res[1], + 'template': result_template, + } + ) return ret return [] + def search_keys(query): ret = [] - for key in _redis_client.scan_iter( - match='*{}*'.format(query) - ): + for key in _redis_client.scan_iter(match='*{}*'.format(query)): key_type = _redis_client.type(key) res = None diff --git a/searx/engines/rumble.py b/searx/engines/rumble.py index 407142467..beca2570c 100644 --- a/searx/engines/rumble.py +++ b/searx/engines/rumble.py @@ -68,14 +68,16 @@ def response(resp): else: content = f"{views} views - {rumbles} rumbles" - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'author': author, - 'length': length, - 'template': 'videos.html', - 'publishedDate': fixed_date, - 'thumbnail': thumbnail, - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'author': author, + 'length': length, + 'template': 'videos.html', + 'publishedDate': fixed_date, + 'thumbnail': thumbnail, + } + ) return results diff --git a/searx/engines/scanr_structures.py b/searx/engines/scanr_structures.py index 51c925247..ad27079dd 100644 --- a/searx/engines/scanr_structures.py +++ b/searx/engines/scanr_structures.py @@ -32,12 +32,16 @@ def request(query, params): params['url'] = search_url params['method'] = 'POST' params['headers']['Content-type'] = "application/json" - params['data'] = dumps({"query": query, - "searchField": "ALL", - "sortDirection": "ASC", - "sortOrder": "RELEVANCY", - "page": params['pageno'], - "pageSize": page_size}) + params['data'] = dumps( + { + "query": query, + "searchField": "ALL", + "sortDirection": "ASC", + "sortOrder": "RELEVANCY", + "page": params['pageno'], + "pageSize": page_size, + } + ) return params @@ -69,11 +73,15 @@ def response(resp): content = result['highlights'][0]['value'] # append result - results.append({'url': url + 'structure/' + result['id'], - 'title': result['label'], - # 'thumbnail': thumbnail, - 'img_src': thumbnail, - 'content': html_to_text(content)}) + results.append( + { + 'url': url + 'structure/' + result['id'], + 'title': result['label'], + # 'thumbnail': thumbnail, + 'img_src': thumbnail, + 'content': html_to_text(content), + } + ) # return results return results diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py index 8c1330d98..a4b0308f9 100644 --- a/searx/engines/searchcode_code.py +++ b/searx/engines/searchcode_code.py @@ -25,10 +25,7 @@ url = 'https://searchcode.com/' search_url = url + 'api/codesearch_I/?{query}&p={pageno}' # special code-endings which are not recognised by the file ending -code_endings = {'cs': 'c#', - 'h': 'c', - 'hpp': 'cpp', - 'cxx': 'cpp'} +code_endings = {'cs': 'c#', 'h': 'c', 'hpp': 'cpp', 'cxx': 'cpp'} # do search-request @@ -55,17 +52,21 @@ def response(resp): lines[int(line)] = code code_language = code_endings.get( - result['filename'].split('.')[-1].lower(), - result['filename'].split('.')[-1].lower()) + result['filename'].split('.')[-1].lower(), result['filename'].split('.')[-1].lower() + ) # append result - results.append({'url': href, - 'title': title, - 'content': '', - 'repository': repo, - 'codelines': sorted(lines.items()), - 'code_language': code_language, - 'template': 'code.html'}) + results.append( + { + 'url': href, + 'title': title, + 'content': '', + 'repository': repo, + 'codelines': sorted(lines.items()), + 'code_language': code_language, + 'template': 'code.html', + } + ) # return results return results diff --git a/searx/engines/searx_engine.py b/searx/engines/searx_engine.py index 98ef0fb79..3e9035d6f 100644 --- a/searx/engines/searx_engine.py +++ b/searx/engines/searx_engine.py @@ -37,7 +37,7 @@ def request(query, params): 'language': params['language'], 'time_range': params['time_range'], 'category': params['category'], - 'format': 'json' + 'format': 'json', } return params diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py index 297d0cf71..5d9d1a8e9 100644 --- a/searx/engines/semantic_scholar.py +++ b/searx/engines/semantic_scholar.py @@ -13,19 +13,21 @@ def request(query, params): params['url'] = search_url params['method'] = 'POST' params['headers']['content-type'] = 'application/json' - params['data'] = dumps({ - "queryString": query, - "page": params['pageno'], - "pageSize": 10, - "sort": "relevance", - "useFallbackRankerService": False, - "useFallbackSearchCluster": False, - "getQuerySuggestions": False, - "authors": [], - "coAuthors": [], - "venues": [], - "performTitleMatch": True, - }) + params['data'] = dumps( + { + "queryString": query, + "page": params['pageno'], + "pageSize": 10, + "sort": "relevance", + "useFallbackRankerService": False, + "useFallbackSearchCluster": False, + "getQuerySuggestions": False, + "authors": [], + "coAuthors": [], + "venues": [], + "performTitleMatch": True, + } + ) return params @@ -33,10 +35,12 @@ def response(resp): res = loads(resp.text) results = [] for result in res['results']: - results.append({ - 'url': result['primaryPaperLink']['url'], - 'title': result['title']['text'], - 'content': result['paperAbstractTruncated'] - }) + results.append( + { + 'url': result['primaryPaperLink']['url'], + 'title': result['title']['text'], + 'content': result['paperAbstractTruncated'], + } + ) return results diff --git a/searx/engines/sepiasearch.py b/searx/engines/sepiasearch.py index 8ccde404f..00b1b3672 100644 --- a/searx/engines/sepiasearch.py +++ b/searx/engines/sepiasearch.py @@ -23,23 +23,21 @@ paging = True time_range_support = True safesearch = True supported_languages = [ + # fmt: off 'en', 'fr', 'ja', 'eu', 'ca', 'cs', 'eo', 'el', 'de', 'it', 'nl', 'es', 'oc', 'gd', 'zh', 'pt', 'sv', 'pl', 'fi', 'ru' + # fmt: on ] base_url = 'https://sepiasearch.org/api/v1/search/videos' -safesearch_table = { - 0: 'both', - 1: 'false', - 2: 'false' -} +safesearch_table = {0: 'both', 1: 'false', 2: 'false'} time_range_table = { 'day': relativedelta.relativedelta(), 'week': relativedelta.relativedelta(weeks=-1), 'month': relativedelta.relativedelta(months=-1), - 'year': relativedelta.relativedelta(years=-1) + 'year': relativedelta.relativedelta(years=-1), } @@ -53,13 +51,19 @@ def minute_to_hm(minute): def request(query, params): - params['url'] = base_url + '?' + urlencode({ - 'search': query, - 'start': (params['pageno'] - 1) * 10, - 'count': 10, - 'sort': '-match', - 'nsfw': safesearch_table[params['safesearch']] - }) + params['url'] = ( + base_url + + '?' + + urlencode( + { + 'search': query, + 'start': (params['pageno'] - 1) * 10, + 'count': 10, + 'sort': '-match', + 'nsfw': safesearch_table[params['safesearch']], + } + ) + ) language = params['language'].split('-')[0] if language in supported_languages: @@ -89,14 +93,18 @@ def response(resp): length = minute_to_hm(result.get('duration')) url = result['url'] - results.append({'url': url, - 'title': title, - 'content': content, - 'author': author, - 'length': length, - 'template': 'videos.html', - 'publishedDate': publishedDate, - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'author': author, + 'length': length, + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) return results diff --git a/searx/engines/seznam.py b/searx/engines/seznam.py index 85cb25b7f..2e95b4769 100644 --- a/searx/engines/seznam.py +++ b/searx/engines/seznam.py @@ -58,10 +58,12 @@ def response(resp): if result_data is None: continue title_element = eval_xpath_getindex(result_element, './/h3/a', 0) - results.append({ - 'url': title_element.get('href'), - 'title': extract_text(title_element), - 'content': extract_text(eval_xpath(result_data, './/div[@class="_3eded7"]')), - }) + results.append( + { + 'url': title_element.get('href'), + 'title': extract_text(title_element), + 'content': extract_text(eval_xpath(result_data, './/div[@class="_3eded7"]')), + } + ) return results diff --git a/searx/engines/sjp.py b/searx/engines/sjp.py index 884fddd2d..ad498b847 100644 --- a/searx/engines/sjp.py +++ b/searx/engines/sjp.py @@ -28,9 +28,11 @@ URL = 'https://sjp.pwn.pl' SEARCH_URL = URL + '/szukaj/{query}.html' word_xpath = '//div[@class="query"]' -dict_xpath = ['//div[@class="wyniki sjp-so-wyniki sjp-so-anchor"]', - '//div[@class="wyniki sjp-wyniki sjp-anchor"]', - '//div[@class="wyniki sjp-doroszewski-wyniki sjp-doroszewski-anchor"]'] +dict_xpath = [ + '//div[@class="wyniki sjp-so-wyniki sjp-so-anchor"]', + '//div[@class="wyniki sjp-wyniki sjp-anchor"]', + '//div[@class="wyniki sjp-doroszewski-wyniki sjp-doroszewski-anchor"]', +] def request(query, params): @@ -85,9 +87,11 @@ def response(resp): infobox += "" infobox += "" - results.append({ - 'infobox': word, - 'content': infobox, - }) + results.append( + { + 'infobox': word, + 'content': infobox, + } + ) return results diff --git a/searx/engines/solidtorrents.py b/searx/engines/solidtorrents.py index 7fbef9190..614b38277 100644 --- a/searx/engines/solidtorrents.py +++ b/searx/engines/solidtorrents.py @@ -36,14 +36,16 @@ def response(resp): search_results = loads(resp.text) for result in search_results["results"]: - results.append({ - 'infohash': result["infohash"], - 'seed': result["swarm"]["seeders"], - 'leech': result["swarm"]["leechers"], - 'title': result["title"], - 'url': "https://solidtorrents.net/view/" + result["_id"], - 'filesize': result["size"], - 'magnetlink': result["magnet"], - 'template': "torrent.html", - }) + results.append( + { + 'infohash': result["infohash"], + 'seed': result["swarm"]["seeders"], + 'leech': result["swarm"]["leechers"], + 'title': result["title"], + 'url': "https://solidtorrents.net/view/" + result["_id"], + 'filesize': result["size"], + 'magnetlink': result["magnet"], + 'template': "torrent.html", + } + ) return results diff --git a/searx/engines/solr.py b/searx/engines/solr.py index e26f19442..3e7846f8e 100644 --- a/searx/engines/solr.py +++ b/searx/engines/solr.py @@ -14,10 +14,10 @@ from searx.exceptions import SearxEngineAPIException base_url = 'http://localhost:8983' collection = '' rows = 10 -sort = '' # sorting: asc or desc -field_list = 'name' # list of field names to display on the UI -default_fields = '' # default field to query -query_fields = '' # query fields +sort = '' # sorting: asc or desc +field_list = 'name' # list of field names to display on the UI +default_fields = '' # default field to query +query_fields = '' # query fields _search_url = '' paging = True diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index d5bfc0f6f..004164e37 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -27,17 +27,21 @@ paging = True # search-url # missing attribute: user_id, app_version, app_locale url = 'https://api-v2.soundcloud.com/' -search_url = url + 'search?{query}'\ - '&variant_ids='\ - '&facet=model'\ - '&limit=20'\ - '&offset={offset}'\ - '&linked_partitioning=1'\ - '&client_id={client_id}' # noqa +search_url = ( + url + 'search?{query}' + '&variant_ids=' + '&facet=model' + '&limit=20' + '&offset={offset}' + '&linked_partitioning=1' + '&client_id={client_id}' +) # noqa -embedded_url = '' +embedded_url = ( + '' +) cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U) guest_client_id = '' @@ -75,9 +79,7 @@ def init(engine_settings=None): def request(query, params): offset = (params['pageno'] - 1) * 20 - params['url'] = search_url.format(query=urlencode({'q': query}), - offset=offset, - client_id=guest_client_id) + params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset, client_id=guest_client_id) return params @@ -98,11 +100,15 @@ def response(resp): embedded = embedded_url.format(uri=uri) # append result - results.append({'url': result['permalink_url'], - 'title': title, - 'publishedDate': publishedDate, - 'embedded': embedded, - 'content': content}) + results.append( + { + 'url': result['permalink_url'], + 'title': title, + 'publishedDate': publishedDate, + 'embedded': embedded, + 'content': content, + } + ) # return results return results diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py index 6816fe672..15517e3eb 100644 --- a/searx/engines/spotify.py +++ b/searx/engines/spotify.py @@ -42,9 +42,10 @@ def request(query, params): r = http_post( 'https://accounts.spotify.com/api/token', data={'grant_type': 'client_credentials'}, - headers={'Authorization': 'Basic ' + base64.b64encode( - "{}:{}".format(api_client_id, api_client_secret).encode() - ).decode()} + headers={ + 'Authorization': 'Basic ' + + base64.b64encode("{}:{}".format(api_client_id, api_client_secret).encode()).decode() + }, ) j = loads(r.text) params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))} @@ -63,18 +64,12 @@ def response(resp): if result['type'] == 'track': title = result['name'] url = result['external_urls']['spotify'] - content = '{} - {} - {}'.format( - result['artists'][0]['name'], - result['album']['name'], - result['name']) + content = '{} - {} - {}'.format(result['artists'][0]['name'], result['album']['name'], result['name']) embedded = embedded_url.format(audioid=result['id']) # append result - results.append({'url': url, - 'title': title, - 'embedded': embedded, - 'content': content}) + results.append({'url': url, 'title': title, 'embedded': embedded, 'content': content}) # return results return results diff --git a/searx/engines/springer.py b/searx/engines/springer.py index 246e59b44..512d71e5e 100644 --- a/searx/engines/springer.py +++ b/searx/engines/springer.py @@ -26,15 +26,11 @@ api_key = 'unset' base_url = 'https://api.springernature.com/metadata/json?' + def request(query, params): if api_key == 'unset': raise SearxEngineAPIException('missing Springer-Nature API key') - args = urlencode({ - 'q' : query, - 's' : nb_per_page * (params['pageno'] - 1), - 'p' : nb_per_page, - 'api_key' : api_key - }) + args = urlencode({'q': query, 's': nb_per_page * (params['pageno'] - 1), 'p': nb_per_page, 'api_key': api_key}) params['url'] = base_url + args logger.debug("query_url --> %s", params['url']) return params @@ -50,21 +46,27 @@ def response(resp): content += "..." published = datetime.strptime(record['publicationDate'], '%Y-%m-%d') - metadata = [record[x] for x in [ - 'publicationName', - 'identifier', - 'contentType', - ] if record.get(x) is not None] + metadata = [ + record[x] + for x in [ + 'publicationName', + 'identifier', + 'contentType', + ] + if record.get(x) is not None + ] metadata = ' / '.join(metadata) if record.get('startingPage') and record.get('endingPage') is not None: metadata += " (%(startingPage)s-%(endingPage)s)" % record - results.append({ - 'title': record['title'], - 'url': record['url'][0]['value'].replace('http://', 'https://', 1), - 'content' : content, - 'publishedDate' : published, - 'metadata' : metadata - }) + results.append( + { + 'title': record['title'], + 'url': record['url'][0]['value'].replace('http://', 'https://', 1), + 'content': content, + 'publishedDate': published, + 'metadata': metadata, + } + ) return results diff --git a/searx/engines/sqlite.py b/searx/engines/sqlite.py index 43a85efbb..6de12f5fe 100644 --- a/searx/engines/sqlite.py +++ b/searx/engines/sqlite.py @@ -47,9 +47,9 @@ def search(query, params): query_params = { 'query': query, - 'wildcard': r'%' + query.replace(' ', r'%') + r'%', + 'wildcard': r'%' + query.replace(' ', r'%') + r'%', 'limit': limit, - 'offset': (params['pageno'] - 1) * limit + 'offset': (params['pageno'] - 1) * limit, } query_to_run = query_str + ' LIMIT :limit OFFSET :offset' @@ -59,7 +59,7 @@ def search(query, params): col_names = [cn[0] for cn in cur.description] for row in cur.fetchall(): - item = dict( zip(col_names, map(str, row)) ) + item = dict(zip(col_names, map(str, row))) item['template'] = result_template logger.debug("append result --> %s", item) results.append(item) diff --git a/searx/engines/stackexchange.py b/searx/engines/stackexchange.py index 34cba687c..99615b1a7 100644 --- a/searx/engines/stackexchange.py +++ b/searx/engines/stackexchange.py @@ -23,26 +23,30 @@ paging = True pagesize = 10 api_site = 'stackoverflow' -api_sort= 'activity' +api_sort = 'activity' api_order = 'desc' # https://api.stackexchange.com/docs/advanced-search search_api = 'https://api.stackexchange.com/2.3/search/advanced?' + def request(query, params): - args = urlencode({ - 'q' : query, - 'page' : params['pageno'], - 'pagesize' : pagesize, - 'site' : api_site, - 'sort' : api_sort, - 'order': 'desc', - }) + args = urlencode( + { + 'q': query, + 'page': params['pageno'], + 'pagesize': pagesize, + 'site': api_site, + 'sort': api_sort, + 'order': 'desc', + } + ) params['url'] = search_api + args return params + def response(resp): results = [] @@ -56,10 +60,12 @@ def response(resp): content += ' // is answered' content += " // score: %s" % result['score'] - results.append({ - 'url': "https://%s.com/q/%s" % (api_site, result['question_id']), - 'title': html.unescape(result['title']), - 'content': html.unescape(content), - }) + results.append( + { + 'url': "https://%s.com/q/%s" % (api_site, result['question_id']), + 'title': html.unescape(result['title']), + 'content': html.unescape(content), + } + ) return results diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index e71310be6..65d90debe 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -101,7 +101,7 @@ def response(resp): # check if search result starts with something like: "2 Sep 2014 ... " if re.match(r"^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content): date_pos = content.find('...') + 4 - date_string = content[0:date_pos - 5] + date_string = content[0 : date_pos - 5] # fix content string content = content[date_pos:] @@ -113,7 +113,7 @@ def response(resp): # check if search result starts with something like: "5 days ago ... " elif re.match(r"^[0-9]+ days? ago \.\.\. ", content): date_pos = content.find('...') + 4 - date_string = content[0:date_pos - 5] + date_string = content[0 : date_pos - 5] # calculate datetime published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group())) @@ -123,15 +123,10 @@ def response(resp): if published_date: # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'publishedDate': published_date}) + results.append({'url': url, 'title': title, 'content': content, 'publishedDate': published_date}) else: # append result - results.append({'url': url, - 'title': title, - 'content': content}) + results.append({'url': url, 'title': title, 'content': content}) # return results return results @@ -152,7 +147,7 @@ def _fetch_supported_languages(resp): 'malayam': 'ml', 'norsk': 'nb', 'sinhalese': 'si', - 'sudanese': 'su' + 'sudanese': 'su', } # get the English name of every language known by babel diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py index 0d62453a9..b01de38c1 100644 --- a/searx/engines/tokyotoshokan.py +++ b/searx/engines/tokyotoshokan.py @@ -56,11 +56,7 @@ def response(resp): name_row = rows[i] links = name_row.xpath('./td[@class="desc-top"]/a') - params = { - 'template': 'torrent.html', - 'url': links[-1].attrib.get('href'), - 'title': extract_text(links[-1]) - } + params = {'template': 'torrent.html', 'url': links[-1].attrib.get('href'), 'title': extract_text(links[-1])} # I have not yet seen any torrents without magnet links, but # it's better to be prepared to stumble upon one some day if len(links) == 2: diff --git a/searx/engines/torznab.py b/searx/engines/torznab.py index 960d1ee90..a48017c13 100644 --- a/searx/engines/torznab.py +++ b/searx/engines/torznab.py @@ -35,10 +35,12 @@ api_key = '' # https://newznab.readthedocs.io/en/latest/misc/api/#predefined-categories torznab_categories = [] -def init(engine_settings=None): # pylint: disable=unused-argument + +def init(engine_settings=None): # pylint: disable=unused-argument if len(base_url) < 1: raise ValueError('missing torznab base_url') + def request(query, params): search_url = base_url + '?t=search&q={search_query}' @@ -48,13 +50,12 @@ def request(query, params): search_url += '&cat={torznab_categories}' params['url'] = search_url.format( - search_query = quote(query), - api_key = api_key, - torznab_categories = ",".join([str(x) for x in torznab_categories]) + search_query=quote(query), api_key=api_key, torznab_categories=",".join([str(x) for x in torznab_categories]) ) return params + def response(resp): results = [] @@ -103,8 +104,7 @@ def response(resp): result["publishedDate"] = None try: - result["publishedDate"] = datetime.strptime( - get_property(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S %z') + result["publishedDate"] = datetime.strptime(get_property(item, 'pubDate'), '%a, %d %b %Y %H:%M:%S %z') except (ValueError, TypeError) as e: logger.debug("ignore exception (publishedDate): %s", e) @@ -134,9 +134,7 @@ def get_property(item, property_name): def get_torznab_attr(item, attr_name): element = item.find( './/torznab:attr[@name="{attr_name}"]'.format(attr_name=attr_name), - { - 'torznab': 'http://torznab.com/schemas/2015/feed' - } + {'torznab': 'http://torznab.com/schemas/2015/feed'}, ) if element is not None: diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 8d67ca0bb..62ade49e2 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -28,24 +28,25 @@ def request(query, params): key_form = '&key=' + api_key else: key_form = '' - params['url'] = url.format(from_lang=params['from_lang'][1], - to_lang=params['to_lang'][1], - query=params['query'], - key=key_form) + params['url'] = url.format( + from_lang=params['from_lang'][1], to_lang=params['to_lang'][1], query=params['query'], key=key_form + ) return params def response(resp): results = [] - results.append({ - 'url': web_url.format( - from_lang=resp.search_params['from_lang'][2], - to_lang=resp.search_params['to_lang'][2], - query=resp.search_params['query']), - 'title': '[{0}-{1}] {2}'.format( - resp.search_params['from_lang'][1], - resp.search_params['to_lang'][1], - resp.search_params['query']), - 'content': resp.json()['responseData']['translatedText'] - }) + results.append( + { + 'url': web_url.format( + from_lang=resp.search_params['from_lang'][2], + to_lang=resp.search_params['to_lang'][2], + query=resp.search_params['query'], + ), + 'title': '[{0}-{1}] {2}'.format( + resp.search_params['from_lang'][1], resp.search_params['to_lang'][1], resp.search_params['query'] + ), + 'content': resp.json()['responseData']['translatedText'], + } + ) return results diff --git a/searx/engines/unsplash.py b/searx/engines/unsplash.py index 1445b4cec..1967fefd2 100644 --- a/searx/engines/unsplash.py +++ b/searx/engines/unsplash.py @@ -26,23 +26,13 @@ paging = True def clean_url(url): parsed = urlparse(url) - query = [(k, v) for (k, v) - in parse_qsl(parsed.query) if k not in ['ixid', 's']] + query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']] - return urlunparse(( - parsed.scheme, - parsed.netloc, - parsed.path, - parsed.params, - urlencode(query), - parsed.fragment - )) + return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, urlencode(query), parsed.fragment)) def request(query, params): - params['url'] = search_url + urlencode({ - 'query': query, 'page': params['pageno'], 'per_page': page_size - }) + params['url'] = search_url + urlencode({'query': query, 'page': params['pageno'], 'per_page': page_size}) logger.debug("query_url --> %s", params['url']) return params @@ -53,13 +43,15 @@ def response(resp): if 'results' in json_data: for result in json_data['results']: - results.append({ - 'template': 'images.html', - 'url': clean_url(result['links']['html']), - 'thumbnail_src': clean_url(result['urls']['thumb']), - 'img_src': clean_url(result['urls']['raw']), - 'title': result.get('alt_description') or 'unknown', - 'content': result.get('description') or '' - }) + results.append( + { + 'template': 'images.html', + 'url': clean_url(result['links']['html']), + 'thumbnail_src': clean_url(result['urls']['thumb']), + 'img_src': clean_url(result['urls']['raw']), + 'title': result.get('alt_description') or 'unknown', + 'content': result.get('description') or '', + } + ) return results diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index 824579256..52d201eac 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -25,15 +25,16 @@ paging = True base_url = 'https://vimeo.com/' search_url = base_url + '/search/page:{pageno}?{query}' -embedded_url = '' +embedded_url = ( + '' +) # do search-request def request(query, params): - params['url'] = search_url.format(pageno=params['pageno'], - query=urlencode({'q': query})) + params['url'] = search_url.format(pageno=params['pageno'], query=urlencode({'q': query})) return params @@ -56,13 +57,17 @@ def response(resp): embedded = embedded_url.format(videoid=videoid) # append result - results.append({'url': url, - 'title': title, - 'content': '', - 'template': 'videos.html', - 'publishedDate': publishedDate, - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'url': url, + 'title': title, + 'content': '', + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) # return results return results diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index f0dfc7595..e5d3f55c0 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -14,7 +14,10 @@ from searx.data import WIKIDATA_UNITS from searx.network import post, get from searx.utils import match_language, searx_useragent, get_string_replaces_function from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom -from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import +from searx.engines.wikipedia import ( # pylint: disable=unused-import + _fetch_supported_languages, + supported_languages_url, +) # about about = { @@ -92,24 +95,27 @@ WHERE { # https://www.w3.org/TR/sparql11-query/#rSTRING_LITERAL1 # https://lists.w3.org/Archives/Public/public-rdf-dawg/2011OctDec/0175.html -sparql_string_escape = get_string_replaces_function({'\t': '\\\t', - '\n': '\\\n', - '\r': '\\\r', - '\b': '\\\b', - '\f': '\\\f', - '\"': '\\\"', - '\'': '\\\'', - '\\': '\\\\'}) +sparql_string_escape = get_string_replaces_function( + # fmt: off + { + '\t': '\\\t', + '\n': '\\\n', + '\r': '\\\r', + '\b': '\\\b', + '\f': '\\\f', + '\"': '\\\"', + '\'': '\\\'', + '\\': '\\\\' + } + # fmt: on +) replace_http_by_https = get_string_replaces_function({'http:': 'https:'}) def get_headers(): # user agent: https://www.mediawiki.org/wiki/Wikidata_Query_Service/User_Manual#Query_limits - return { - 'Accept': 'application/sparql-results+json', - 'User-Agent': searx_useragent() - } + return {'Accept': 'application/sparql-results+json', 'User-Agent': searx_useragent()} def get_label_for_entity(entity_id, language): @@ -205,9 +211,9 @@ def get_results(attribute_result, attributes, language): results.append({'title': infobox_title, 'url': url}) # update the infobox_id with the wikipedia URL # first the local wikipedia URL, and as fallback the english wikipedia URL - if attribute_type == WDArticle\ - and ((attribute.language == 'en' and infobox_id_lang is None) - or attribute.language != 'en'): + if attribute_type == WDArticle and ( + (attribute.language == 'en' and infobox_id_lang is None) or attribute.language != 'en' + ): infobox_id_lang = attribute.language infobox_id = url elif attribute_type == WDImageAttribute: @@ -226,13 +232,11 @@ def get_results(attribute_result, attributes, language): osm_zoom = area_to_osm_zoom(area) if area else 19 url = attribute.get_geo_url(attribute_result, osm_zoom=osm_zoom) if url: - infobox_urls.append({'title': attribute.get_label(language), - 'url': url, - 'entity': attribute.name}) + infobox_urls.append({'title': attribute.get_label(language), 'url': url, 'entity': attribute.name}) else: - infobox_attributes.append({'label': attribute.get_label(language), - 'value': value, - 'entity': attribute.name}) + infobox_attributes.append( + {'label': attribute.get_label(language), 'value': value, 'entity': attribute.name} + ) if infobox_id: infobox_id = replace_http_by_https(infobox_id) @@ -240,22 +244,19 @@ def get_results(attribute_result, attributes, language): # add the wikidata URL at the end infobox_urls.append({'title': 'Wikidata', 'url': attribute_result['item']}) - if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and\ - len(infobox_content) == 0: - results.append({ - 'url': infobox_urls[0]['url'], - 'title': infobox_title, - 'content': infobox_content - }) + if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and len(infobox_content) == 0: + results.append({'url': infobox_urls[0]['url'], 'title': infobox_title, 'content': infobox_content}) else: - results.append({ - 'infobox': infobox_title, - 'id': infobox_id, - 'content': infobox_content, - 'img_src': img_src, - 'urls': infobox_urls, - 'attributes': infobox_attributes - }) + results.append( + { + 'infobox': infobox_title, + 'id': infobox_id, + 'content': infobox_content, + 'img_src': img_src, + 'urls': infobox_urls, + 'attributes': infobox_attributes, + } + ) return results @@ -265,13 +266,14 @@ def get_query(query, language): where = list(filter(lambda s: len(s) > 0, [a.get_where() for a in attributes])) wikibase_label = list(filter(lambda s: len(s) > 0, [a.get_wikibase_label() for a in attributes])) group_by = list(filter(lambda s: len(s) > 0, [a.get_group_by() for a in attributes])) - query = QUERY_TEMPLATE\ - .replace('%QUERY%', sparql_string_escape(query))\ - .replace('%SELECT%', ' '.join(select))\ - .replace('%WHERE%', '\n '.join(where))\ - .replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label))\ - .replace('%GROUP_BY%', ' '.join(group_by))\ + query = ( + QUERY_TEMPLATE.replace('%QUERY%', sparql_string_escape(query)) + .replace('%SELECT%', ' '.join(select)) + .replace('%WHERE%', '\n '.join(where)) + .replace('%WIKIBASE_LABELS%', '\n '.join(wikibase_label)) + .replace('%GROUP_BY%', ' '.join(group_by)) .replace('%LANGUAGE%', language) + ) return query, attributes @@ -297,90 +299,98 @@ def get_attributes(language): attributes.append(WDDateAttribute(name)) # Dates - for p in ['P571', # inception date - 'P576', # dissolution date - 'P580', # start date - 'P582', # end date - 'P569', # date of birth - 'P570', # date of death - 'P619', # date of spacecraft launch - 'P620']: # date of spacecraft landing + for p in [ + 'P571', # inception date + 'P576', # dissolution date + 'P580', # start date + 'P582', # end date + 'P569', # date of birth + 'P570', # date of death + 'P619', # date of spacecraft launch + 'P620', + ]: # date of spacecraft landing add_date(p) - for p in ['P27', # country of citizenship - 'P495', # country of origin - 'P17', # country - 'P159']: # headquarters location + for p in [ + 'P27', # country of citizenship + 'P495', # country of origin + 'P17', # country + 'P159', + ]: # headquarters location add_label(p) # Places - for p in ['P36', # capital - 'P35', # head of state - 'P6', # head of government - 'P122', # basic form of government - 'P37']: # official language + for p in [ + 'P36', # capital + 'P35', # head of state + 'P6', # head of government + 'P122', # basic form of government + 'P37', + ]: # official language add_label(p) - add_value('P1082') # population + add_value('P1082') # population add_amount('P2046') # area - add_amount('P281') # postal code - add_label('P38') # currency + add_amount('P281') # postal code + add_label('P38') # currency add_amount('P2048') # heigth (building) # Media - for p in ['P400', # platform (videogames, computing) - 'P50', # author - 'P170', # creator - 'P57', # director - 'P175', # performer - 'P178', # developer - 'P162', # producer - 'P176', # manufacturer - 'P58', # screenwriter - 'P272', # production company - 'P264', # record label - 'P123', # publisher - 'P449', # original network - 'P750', # distributed by - 'P86']: # composer + for p in [ + 'P400', # platform (videogames, computing) + 'P50', # author + 'P170', # creator + 'P57', # director + 'P175', # performer + 'P178', # developer + 'P162', # producer + 'P176', # manufacturer + 'P58', # screenwriter + 'P272', # production company + 'P264', # record label + 'P123', # publisher + 'P449', # original network + 'P750', # distributed by + 'P86', + ]: # composer add_label(p) - add_date('P577') # publication date - add_label('P136') # genre (music, film, artistic...) - add_label('P364') # original language - add_value('P212') # ISBN-13 - add_value('P957') # ISBN-10 - add_label('P275') # copyright license - add_label('P277') # programming language - add_value('P348') # version - add_label('P840') # narrative location + add_date('P577') # publication date + add_label('P136') # genre (music, film, artistic...) + add_label('P364') # original language + add_value('P212') # ISBN-13 + add_value('P957') # ISBN-10 + add_label('P275') # copyright license + add_label('P277') # programming language + add_value('P348') # version + add_label('P840') # narrative location # Languages - add_value('P1098') # number of speakers - add_label('P282') # writing system - add_label('P1018') # language regulatory body - add_value('P218') # language code (ISO 639-1) + add_value('P1098') # number of speakers + add_label('P282') # writing system + add_label('P1018') # language regulatory body + add_value('P218') # language code (ISO 639-1) # Other - add_label('P169') # ceo - add_label('P112') # founded by - add_label('P1454') # legal form (company, organization) - add_label('P137') # operator (service, facility, ...) - add_label('P1029') # crew members (tripulation) - add_label('P225') # taxon name - add_value('P274') # chemical formula - add_label('P1346') # winner (sports, contests, ...) - add_value('P1120') # number of deaths - add_value('P498') # currency code (ISO 4217) + add_label('P169') # ceo + add_label('P112') # founded by + add_label('P1454') # legal form (company, organization) + add_label('P137') # operator (service, facility, ...) + add_label('P1029') # crew members (tripulation) + add_label('P225') # taxon name + add_value('P274') # chemical formula + add_label('P1346') # winner (sports, contests, ...) + add_value('P1120') # number of deaths + add_value('P498') # currency code (ISO 4217) # URL - add_url('P856', official=True) # official website + add_url('P856', official=True) # official website attributes.append(WDArticle(language)) # wikipedia (user language) if not language.startswith('en'): attributes.append(WDArticle('en')) # wikipedia (english) - add_url('P1324') # source code repository - add_url('P1581') # blog + add_url('P1324') # source code repository + add_url('P1581') # blog add_url('P434', url_id='musicbrainz_artist') add_url('P435', url_id='musicbrainz_work') add_url('P436', url_id='musicbrainz_release_group') @@ -396,11 +406,11 @@ def get_attributes(language): attributes.append(WDGeoAttribute('P625')) # Image - add_image('P15', priority=1, url_id='wikimedia_image') # route map - add_image('P242', priority=2, url_id='wikimedia_image') # locator map - add_image('P154', priority=3, url_id='wikimedia_image') # logo - add_image('P18', priority=4, url_id='wikimedia_image') # image - add_image('P41', priority=5, url_id='wikimedia_image') # flag + add_image('P15', priority=1, url_id='wikimedia_image') # route map + add_image('P242', priority=2, url_id='wikimedia_image') # locator map + add_image('P154', priority=3, url_id='wikimedia_image') # logo + add_image('P18', priority=4, url_id='wikimedia_image') # image + add_image('P41', priority=5, url_id='wikimedia_image') # flag add_image('P2716', priority=6, url_id='wikimedia_image') # collage add_image('P2910', priority=7, url_id='wikimedia_image') # icon @@ -409,7 +419,7 @@ def get_attributes(language): class WDAttribute: - __slots__ = 'name', + __slots__ = ('name',) def __init__(self, name): self.name = name @@ -437,14 +447,15 @@ class WDAttribute: class WDAmountAttribute(WDAttribute): - def get_select(self): return '?{name} ?{name}Unit'.replace('{name}', self.name) def get_where(self): return """ OPTIONAL { ?item p:{name} ?{name}Node . ?{name}Node rdf:type wikibase:BestRank ; ps:{name} ?{name} . - OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace('{name}', self.name) + OPTIONAL { ?{name}Node psv:{name}/wikibase:quantityUnit ?{name}Unit. } }""".replace( + '{name}', self.name + ) def get_group_by(self): return self.get_select() @@ -478,7 +489,9 @@ class WDArticle(WDAttribute): return """OPTIONAL { ?article{language} schema:about ?item ; schema:inLanguage "{language}" ; schema:isPartOf ; - schema:name ?articleName{language} . }""".replace('{language}', self.language) + schema:name ?articleName{language} . }""".replace( + '{language}', self.language + ) def get_group_by(self): return self.get_select() @@ -489,7 +502,6 @@ class WDArticle(WDAttribute): class WDLabelAttribute(WDAttribute): - def get_select(self): return '(group_concat(distinct ?{name}Label;separator=", ") as ?{name}Labels)'.replace('{name}', self.name) @@ -520,14 +532,13 @@ class WDURLAttribute(WDAttribute): value = value.split(',')[0] url_id = self.url_id if value.startswith(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE): - value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):] + value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE) :] url_id = 'wikimedia_image' return get_external_url(url_id, value) return value class WDGeoAttribute(WDAttribute): - def get_label(self, language): return "OpenStreetMap" @@ -537,7 +548,9 @@ class WDGeoAttribute(WDAttribute): def get_where(self): return """OPTIONAL { ?item p:{name}/psv:{name} [ wikibase:geoLatitude ?{name}Lat ; - wikibase:geoLongitude ?{name}Long ] }""".replace('{name}', self.name) + wikibase:geoLongitude ?{name}Long ] }""".replace( + '{name}', self.name + ) def get_group_by(self): return self.get_select() @@ -559,7 +572,7 @@ class WDGeoAttribute(WDAttribute): class WDImageAttribute(WDURLAttribute): - __slots__ = 'priority', + __slots__ = ('priority',) def __init__(self, name, url_id=None, priority=100): super().__init__(name, url_id) @@ -567,7 +580,6 @@ class WDImageAttribute(WDURLAttribute): class WDDateAttribute(WDAttribute): - def get_select(self): return '?{name} ?{name}timePrecision ?{name}timeZone ?{name}timeCalendar'.replace('{name}', self.name) @@ -581,7 +593,9 @@ class WDDateAttribute(WDAttribute): wikibase:timePrecision ?{name}timePrecision ; wikibase:timeTimezone ?{name}timeZone ; wikibase:timeCalendarModel ?{name}timeCalendar ] . } - hint:Prior hint:rangeSafe true;""".replace('{name}', self.name) + hint:Prior hint:rangeSafe true;""".replace( + '{name}', self.name + ) def get_group_by(self): return self.get_select() @@ -613,11 +627,12 @@ class WDDateAttribute(WDAttribute): def format_13(self, value, locale): timestamp = isoparse(value) # precision: minute - return get_datetime_format(format, locale=locale) \ - .replace("'", "") \ - .replace('{0}', format_time(timestamp, 'full', tzinfo=None, - locale=locale)) \ + return ( + get_datetime_format(format, locale=locale) + .replace("'", "") + .replace('{0}', format_time(timestamp, 'full', tzinfo=None, locale=locale)) .replace('{1}', format_date(timestamp, 'short', locale=locale)) + ) def format_14(self, value, locale): # precision: second. @@ -638,7 +653,7 @@ class WDDateAttribute(WDAttribute): '11': ('format_11', 0), # day '12': ('format_13', 0), # hour (not supported by babel, display minute) '13': ('format_13', 0), # minute - '14': ('format_14', 0) # second + '14': ('format_14', 0), # second } def get_str(self, result, language): diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py index 5e34db9a7..cc806a8de 100644 --- a/searx/engines/wikipedia.py +++ b/searx/engines/wikipedia.py @@ -39,8 +39,7 @@ def request(query, params): query = query.title() language = url_lang(params['language']) - params['url'] = search_url.format(title=quote(query), - language=language) + params['url'] = search_url.format(title=quote(query), language=language) if params['language'].lower() in language_variants.get(language, []): params['headers']['Accept-Language'] = params['language'].lower() @@ -63,8 +62,10 @@ def response(resp): except: pass else: - if api_result['type'] == 'https://mediawiki.org/wiki/HyperSwitch/errors/bad_request' \ - and api_result['detail'] == 'title-invalid-characters': + if ( + api_result['type'] == 'https://mediawiki.org/wiki/HyperSwitch/errors/bad_request' + and api_result['detail'] == 'title-invalid-characters' + ): return [] raise_for_httperror(resp) @@ -81,11 +82,15 @@ def response(resp): results.append({'url': wikipedia_link, 'title': title}) - results.append({'infobox': title, - 'id': wikipedia_link, - 'content': api_result.get('extract', ''), - 'img_src': api_result.get('thumbnail', {}).get('source'), - 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]}) + results.append( + { + 'infobox': title, + 'id': wikipedia_link, + 'content': api_result.get('extract', ''), + 'img_src': api_result.get('thumbnail', {}).get('source'), + 'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}], + } + ) return results diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index 9c84e2809..1c882c582 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -36,8 +36,7 @@ img_alt_xpath = './@alt' # pods to display as image in infobox # this pods do return a plaintext, but they look better and are more useful as images -image_pods = {'VisualRepresentation', - 'Illustration'} +image_pods = {'VisualRepresentation', 'Illustration'} # do search-request @@ -50,15 +49,17 @@ def request(query, params): # replace private user area characters to make text legible def replace_pua_chars(text): - pua_chars = {'\uf522': '\u2192', # rigth arrow - '\uf7b1': '\u2115', # set of natural numbers - '\uf7b4': '\u211a', # set of rational numbers - '\uf7b5': '\u211d', # set of real numbers - '\uf7bd': '\u2124', # set of integer numbers - '\uf74c': 'd', # differential - '\uf74d': '\u212f', # euler's number - '\uf74e': 'i', # imaginary number - '\uf7d9': '='} # equals sign + pua_chars = { + '\uf522': '\u2192', # rigth arrow + '\uf7b1': '\u2115', # set of natural numbers + '\uf7b4': '\u211a', # set of rational numbers + '\uf7b5': '\u211d', # set of real numbers + '\uf7bd': '\u2124', # set of integer numbers + '\uf74c': 'd', # differential + '\uf74d': '\u212f', # euler's number + '\uf74e': 'i', # imaginary number + '\uf7d9': '=', + } # equals sign for k, v in pua_chars.items(): text = text.replace(k, v) @@ -112,9 +113,12 @@ def response(resp): result_chunks.append({'label': pod_title, 'value': content}) elif image: - result_chunks.append({'label': pod_title, - 'image': {'src': image[0].xpath(img_src_xpath)[0], - 'alt': image[0].xpath(img_alt_xpath)[0]}}) + result_chunks.append( + { + 'label': pod_title, + 'image': {'src': image[0].xpath(img_src_xpath)[0], 'alt': image[0].xpath(img_alt_xpath)[0]}, + } + ) if not result_chunks: return [] @@ -122,13 +126,15 @@ def response(resp): title = "Wolfram|Alpha (%s)" % infobox_title # append infobox - results.append({'infobox': infobox_title, - 'attributes': result_chunks, - 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]}) + results.append( + { + 'infobox': infobox_title, + 'attributes': result_chunks, + 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}], + } + ) # append link to site - results.append({'url': resp.request.headers['Referer'], - 'title': title, - 'content': result_content}) + results.append({'url': resp.request.headers['Referer'], 'title': title, 'content': result_content}) return results diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 1f2cfa4e6..bad25602a 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -22,30 +22,29 @@ about = { # search-url url = 'https://www.wolframalpha.com/' -search_url = url + 'input/json.jsp'\ - '?async=false'\ - '&banners=raw'\ - '&debuggingdata=false'\ - '&format=image,plaintext,imagemap,minput,moutput'\ - '&formattimeout=2'\ - '&{query}'\ - '&output=JSON'\ - '&parsetimeout=2'\ - '&proxycode={token}'\ - '&scantimeout=0.5'\ - '&sponsorcategories=true'\ +search_url = ( + url + 'input/json.jsp' + '?async=false' + '&banners=raw' + '&debuggingdata=false' + '&format=image,plaintext,imagemap,minput,moutput' + '&formattimeout=2' + '&{query}' + '&output=JSON' + '&parsetimeout=2' + '&proxycode={token}' + '&scantimeout=0.5' + '&sponsorcategories=true' '&statemethod=deploybutton' +) referer_url = url + 'input/?{query}' -token = {'value': '', - 'last_updated': None} +token = {'value': '', 'last_updated': None} # pods to display as image in infobox # this pods do return a plaintext, but they look better and are more useful as images -image_pods = {'VisualRepresentation', - 'Illustration', - 'Symbol'} +image_pods = {'VisualRepresentation', 'Illustration', 'Symbol'} # seems, wolframalpha resets its token in every hour @@ -115,12 +114,20 @@ def response(resp): if not result_chunks: return [] - results.append({'infobox': infobox_title, - 'attributes': result_chunks, - 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]}) + results.append( + { + 'infobox': infobox_title, + 'attributes': result_chunks, + 'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}], + } + ) - results.append({'url': resp.request.headers['Referer'], - 'title': 'Wolfram|Alpha (' + infobox_title + ')', - 'content': result_content}) + results.append( + { + 'url': resp.request.headers['Referer'], + 'title': 'Wolfram|Alpha (' + infobox_title + ')', + 'content': result_content, + } + ) return results diff --git a/searx/engines/wordnik.py b/searx/engines/wordnik.py index 0c3785cfb..21eaeccc3 100644 --- a/searx/engines/wordnik.py +++ b/searx/engines/wordnik.py @@ -48,7 +48,7 @@ def response(resp): def_abbr = extract_text(def_item.xpath('.//abbr')).strip() def_text = extract_text(def_item).strip() if def_abbr: - def_text = def_text[len(def_abbr):].strip() + def_text = def_text[len(def_abbr) :].strip() src_defs.append((def_abbr, def_text)) definitions.append((src_text, src_defs)) @@ -66,9 +66,11 @@ def response(resp): infobox += f"
  • {def_abbr} {def_text}
  • " infobox += "" - results.append({ - 'infobox': word, - 'content': infobox, - }) + results.append( + { + 'infobox': word, + 'content': infobox, + } + ) return results diff --git a/searx/engines/www1x.py b/searx/engines/www1x.py index 96b8d680c..f6b82944d 100644 --- a/searx/engines/www1x.py +++ b/searx/engines/www1x.py @@ -46,12 +46,16 @@ def response(resp): thumbnail_src = urljoin(gallery_url, eval_xpath_getindex(link, './/img', 0).attrib['src']) # append result - results.append({'url': url, - 'title': title, - 'img_src': thumbnail_src, - 'content': '', - 'thumbnail_src': thumbnail_src, - 'template': 'images.html'}) + results.append( + { + 'url': url, + 'title': title, + 'img_src': thumbnail_src, + 'content': '', + 'thumbnail_src': thumbnail_src, + 'template': 'images.html', + } + ) # return results return results diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 08677b708..2737bf94a 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -56,7 +56,7 @@ Replacements are: """ -lang_all='en' +lang_all = 'en' '''Replacement ``{lang}`` in :py:obj:`search_url` if language ``all`` is selected. ''' @@ -110,9 +110,9 @@ requested by the user, the URL paramter is an empty string. The time_range_map = { 'day': 24, - 'week': 24*7, - 'month': 24*30, - 'year': 24*365, + 'week': 24 * 7, + 'month': 24 * 30, + 'year': 24 * 365, } '''Maps time range value from user to ``{time_range_val}`` in :py:obj:`time_range_url`. @@ -129,11 +129,7 @@ time_range_map = { safe_search_support = False '''Engine supports safe-search.''' -safe_search_map = { - 0: '&filter=none', - 1: '&filter=moderate', - 2: '&filter=strict' -} +safe_search_map = {0: '&filter=none', 1: '&filter=moderate', 2: '&filter=strict'} '''Maps safe-search value to ``{safe_search}`` in :py:obj:`search_url`. .. code:: yaml @@ -146,10 +142,9 @@ safe_search_map = { ''' -def request(query, params): - '''Build request parameters (see :ref:`engine request`). - ''' +def request(query, params): + '''Build request parameters (see :ref:`engine request`).''' lang = lang_all if params['language'] != 'all': lang = params['language'][:2] @@ -167,8 +162,8 @@ def request(query, params): 'query': urlencode({'q': query})[2:], 'lang': lang, 'pageno': (params['pageno'] - 1) * page_size + first_page_num, - 'time_range' : time_range, - 'safe_search' : safe_search, + 'time_range': time_range, + 'safe_search': safe_search, } params['url'] = search_url.format(**fargs) @@ -176,10 +171,9 @@ def request(query, params): return params -def response(resp): - '''Scrap *results* from the response (see :ref:`engine results`). - ''' +def response(resp): + '''Scrap *results* from the response (see :ref:`engine results`).''' results = [] dom = html.fromstring(resp.text) is_onion = 'onions' in categories @@ -200,10 +194,7 @@ def response(resp): # add alternative cached url if available if cached_xpath: - tmp_result['cached_url'] = ( - cached_url - + extract_text(eval_xpath_list(result, cached_xpath, min_len=1)) - ) + tmp_result['cached_url'] = cached_url + extract_text(eval_xpath_list(result, cached_xpath, min_len=1)) if is_onion: tmp_result['is_onion'] = True @@ -213,31 +204,27 @@ def response(resp): else: if cached_xpath: for url, title, content, cached in zip( - (extract_url(x, search_url) for - x in eval_xpath_list(dom, url_xpath)), + (extract_url(x, search_url) for x in eval_xpath_list(dom, url_xpath)), map(extract_text, eval_xpath_list(dom, title_xpath)), map(extract_text, eval_xpath_list(dom, content_xpath)), - map(extract_text, eval_xpath_list(dom, cached_xpath)) + map(extract_text, eval_xpath_list(dom, cached_xpath)), ): - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'cached_url': cached_url + cached, 'is_onion': is_onion - }) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'cached_url': cached_url + cached, + 'is_onion': is_onion, + } + ) else: for url, title, content in zip( - (extract_url(x, search_url) for - x in eval_xpath_list(dom, url_xpath)), + (extract_url(x, search_url) for x in eval_xpath_list(dom, url_xpath)), map(extract_text, eval_xpath_list(dom, title_xpath)), - map(extract_text, eval_xpath_list(dom, content_xpath)) + map(extract_text, eval_xpath_list(dom, content_xpath)), ): - results.append({ - 'url': url, - 'title': title, - 'content': content, - 'is_onion': is_onion - }) + results.append({'url': url, 'title': title, 'content': content, 'is_onion': is_onion}) if suggestion_xpath: for suggestion in eval_xpath(dom, suggestion_xpath): diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index fbd99c47b..12e7305db 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -30,18 +30,16 @@ http_digest_auth_pass = "" # search-url base_url = 'http://localhost:8090' -search_url = '/yacysearch.json?{query}'\ - '&startRecord={offset}'\ - '&maximumRecords={limit}'\ - '&contentdom={search_type}'\ - '&resource=global' +search_url = ( + '/yacysearch.json?{query}' + '&startRecord={offset}' + '&maximumRecords={limit}' + '&contentdom={search_type}' + '&resource=global' +) # yacy specific type-definitions -search_types = {'general': 'text', - 'images': 'image', - 'files': 'app', - 'music': 'audio', - 'videos': 'video'} +search_types = {'general': 'text', 'images': 'image', 'files': 'app', 'music': 'audio', 'videos': 'video'} # do search-request @@ -49,11 +47,9 @@ def request(query, params): offset = (params['pageno'] - 1) * number_of_results search_type = search_types.get(params.get('category'), '0') - params['url'] = base_url +\ - search_url.format(query=urlencode({'query': query}), - offset=offset, - limit=number_of_results, - search_type=search_type) + params['url'] = base_url + search_url.format( + query=urlencode({'query': query}), offset=offset, limit=number_of_results, search_type=search_type + ) if http_digest_auth_user and http_digest_auth_pass: params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass) @@ -93,21 +89,29 @@ def response(resp): continue # append result - results.append({'url': result_url, - 'title': result['title'], - 'content': '', - 'img_src': result['image'], - 'template': 'images.html'}) + results.append( + { + 'url': result_url, + 'title': result['title'], + 'content': '', + 'img_src': result['image'], + 'template': 'images.html', + } + ) # parse general results else: publishedDate = parser.parse(result['pubDate']) # append result - results.append({'url': result['link'], - 'title': result['title'], - 'content': html_to_text(result['description']), - 'publishedDate': publishedDate}) + results.append( + { + 'url': result['link'], + 'title': result['title'], + 'content': html_to_text(result['description']), + 'publishedDate': publishedDate, + } + ) # TODO parse video, audio and file results diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index bd6e6721c..08bde6665 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -50,59 +50,59 @@ language_aliases = { } lang2domain = { - 'zh_chs' : 'hk.search.yahoo.com', - 'zh_cht' : 'tw.search.yahoo.com', - 'en' : 'search.yahoo.com', - - 'bg' : 'search.yahoo.com', - 'cs' : 'search.yahoo.com', - 'da' : 'search.yahoo.com', - 'el' : 'search.yahoo.com', - 'et' : 'search.yahoo.com', - 'he' : 'search.yahoo.com', - 'hr' : 'search.yahoo.com', - 'ja' : 'search.yahoo.com', - 'ko' : 'search.yahoo.com', - 'sk' : 'search.yahoo.com', - 'sl' : 'search.yahoo.com', - + 'zh_chs': 'hk.search.yahoo.com', + 'zh_cht': 'tw.search.yahoo.com', + 'en': 'search.yahoo.com', + 'bg': 'search.yahoo.com', + 'cs': 'search.yahoo.com', + 'da': 'search.yahoo.com', + 'el': 'search.yahoo.com', + 'et': 'search.yahoo.com', + 'he': 'search.yahoo.com', + 'hr': 'search.yahoo.com', + 'ja': 'search.yahoo.com', + 'ko': 'search.yahoo.com', + 'sk': 'search.yahoo.com', + 'sl': 'search.yahoo.com', } """Map language to domain""" + def _get_language(params): lang = language_aliases.get(params['language']) if lang is None: - lang = match_language( - params['language'], supported_languages, language_aliases - ) + lang = match_language(params['language'], supported_languages, language_aliases) lang = lang.split('-')[0] - logger.debug("params['language']: %s --> %s" , params['language'], lang) + logger.debug("params['language']: %s --> %s", params['language'], lang) return lang + def request(query, params): """build request""" offset = (params['pageno'] - 1) * 7 + 1 - lang = _get_language(params) - age, btf = time_range_dict.get( - params['time_range'], ('', '')) + lang = _get_language(params) + age, btf = time_range_dict.get(params['time_range'], ('', '')) - args = urlencode({ - 'p' : query, - 'ei' : 'UTF-8', - 'fl' : 1, - 'vl' : 'lang_' + lang, - 'btf' : btf, - 'fr2' : 'time', - 'age' : age, - 'b' : offset, - 'xargs' :0 - }) + args = urlencode( + { + 'p': query, + 'ei': 'UTF-8', + 'fl': 1, + 'vl': 'lang_' + lang, + 'btf': btf, + 'fr2': 'time', + 'age': age, + 'b': offset, + 'xargs': 0, + } + ) domain = lang2domain.get(lang, '%s.search.yahoo.com' % lang) params['url'] = 'https://%s/search?%s' % (domain, args) return params + def parse_url(url_string): """remove yahoo-specific tracking-url""" @@ -121,6 +121,7 @@ def parse_url(url_string): end = min(endpositions) return unquote(url_string[start:end]) + def response(resp): """parse response""" @@ -140,18 +141,12 @@ def response(resp): offset = len(extract_text(title.xpath('span'))) title = extract_text(title)[offset:] - content = eval_xpath_getindex( - result, './/div[contains(@class, "compText")]', 0, default='' - ) + content = eval_xpath_getindex(result, './/div[contains(@class, "compText")]', 0, default='') if content: content = extract_text(content) # append result - results.append({ - 'url': url, - 'title': title, - 'content': content - }) + results.append({'url': url, 'title': title, 'content': content}) for suggestion in eval_xpath_list(dom, '//div[contains(@class, "AlsoTry")]//table//a'): # append suggestion @@ -167,6 +162,6 @@ def _fetch_supported_languages(resp): offset = len('lang_') for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'): - supported_languages.append( val[offset:] ) + supported_languages.append(val[offset:]) return supported_languages diff --git a/searx/engines/yahoo_news.py b/searx/engines/yahoo_news.py index ec07cd408..00f208b17 100644 --- a/searx/engines/yahoo_news.py +++ b/searx/engines/yahoo_news.py @@ -40,35 +40,35 @@ categories = ['news'] # search-url search_url = ( + # fmt: off 'https://news.search.yahoo.com/search' '?{query}&b={offset}' - ) + # fmt: on +) AGO_RE = re.compile(r'([0-9]+)\s*(year|month|week|day|minute|hour)') AGO_TIMEDELTA = { - 'minute': timedelta(minutes=1), - 'hour': timedelta(hours=1), - 'day': timedelta(days=1), - 'week': timedelta(days=7), - 'month': timedelta(days=30), - 'year': timedelta(days=365), + 'minute': timedelta(minutes=1), + 'hour': timedelta(hours=1), + 'day': timedelta(days=1), + 'week': timedelta(days=7), + 'month': timedelta(days=30), + 'year': timedelta(days=365), } + def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 - params['url'] = search_url.format( - offset = offset, - query = urlencode({'p': query}) - ) + params['url'] = search_url.format(offset=offset, query=urlencode({'p': query})) logger.debug("query_url --> %s", params['url']) return params + def response(resp): results = [] dom = html.fromstring(resp.text) - # parse results for result in eval_xpath_list(dom, '//ol[contains(@class,"searchCenterMiddle")]//li'): @@ -80,12 +80,7 @@ def response(resp): content = extract_text(result.xpath('.//p')) img_src = eval_xpath_getindex(result, './/img/@data-src', 0, None) - item = { - 'url': url, - 'title': title, - 'content': content, - 'img_src' : img_src - } + item = {'url': url, 'title': title, 'content': content, 'img_src': img_src} pub_date = extract_text(result.xpath('.//span[contains(@class,"s-time")]')) ago = AGO_RE.search(pub_date) diff --git a/searx/engines/youtube_api.py b/searx/engines/youtube_api.py index ed27db07b..52db45960 100644 --- a/searx/engines/youtube_api.py +++ b/searx/engines/youtube_api.py @@ -27,17 +27,18 @@ api_key = None base_url = 'https://www.googleapis.com/youtube/v3/search' search_url = base_url + '?part=snippet&{query}&maxResults=20&key={api_key}' -embedded_url = '' +embedded_url = ( + '' +) base_youtube_url = 'https://www.youtube.com/watch?v=' # do search-request def request(query, params): - params['url'] = search_url.format(query=urlencode({'q': query}), - api_key=api_key) + params['url'] = search_url.format(query=urlencode({'q': query}), api_key=api_key) # add language tag if specified if params['language'] != 'all': @@ -79,13 +80,17 @@ def response(resp): embedded = embedded_url.format(videoid=videoid) # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'template': 'videos.html', - 'publishedDate': publishedDate, - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'template': 'videos.html', + 'publishedDate': publishedDate, + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) # return results return results diff --git a/searx/engines/youtube_noapi.py b/searx/engines/youtube_noapi.py index 68b75bc72..239830cc7 100644 --- a/searx/engines/youtube_noapi.py +++ b/searx/engines/youtube_noapi.py @@ -30,14 +30,13 @@ search_url = base_url + '?search_query={query}&page={page}' time_range_url = '&sp=EgII{time_range}%253D%253D' # the key seems to be constant next_page_url = 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' -time_range_dict = {'day': 'Ag', - 'week': 'Aw', - 'month': 'BA', - 'year': 'BQ'} +time_range_dict = {'day': 'Ag', 'week': 'Aw', 'month': 'BA', 'year': 'BQ'} -embedded_url = '' +embedded_url = ( + '' +) base_youtube_url = 'https://www.youtube.com/watch?v=' @@ -51,10 +50,12 @@ def request(query, params): else: params['url'] = next_page_url params['method'] = 'POST' - params['data'] = dumps({ - 'context': {"client": {"clientName": "WEB", "clientVersion": "2.20210310.12.01"}}, - 'continuation': params['engine_data']['next_page_token'], - }) + params['data'] = dumps( + { + 'context': {"client": {"clientName": "WEB", "clientVersion": "2.20210310.12.01"}}, + 'continuation': params['engine_data']['next_page_token'], + } + ) params['headers']['Content-Type'] = 'application/json' params['headers']['Cookie'] = "CONSENT=YES+cb.%s-17-p0.en+F+941;" % datetime.now().strftime("%Y%m%d") @@ -71,34 +72,42 @@ def response(resp): def parse_next_page_response(response_text): results = [] result_json = loads(response_text) - for section in (result_json['onResponseReceivedCommands'][0] - .get('appendContinuationItemsAction')['continuationItems'][0] - .get('itemSectionRenderer')['contents']): + for section in ( + result_json['onResponseReceivedCommands'][0] + .get('appendContinuationItemsAction')['continuationItems'][0] + .get('itemSectionRenderer')['contents'] + ): if 'videoRenderer' not in section: continue section = section['videoRenderer'] content = "-" if 'descriptionSnippet' in section: content = ' '.join(x['text'] for x in section['descriptionSnippet']['runs']) - results.append({ - 'url': base_youtube_url + section['videoId'], - 'title': ' '.join(x['text'] for x in section['title']['runs']), - 'content': content, - 'author': section['ownerText']['runs'][0]['text'], - 'length': section['lengthText']['simpleText'], - 'template': 'videos.html', - 'embedded': embedded_url.format(videoid=section['videoId']), - 'thumbnail': section['thumbnail']['thumbnails'][-1]['url'], - }) + results.append( + { + 'url': base_youtube_url + section['videoId'], + 'title': ' '.join(x['text'] for x in section['title']['runs']), + 'content': content, + 'author': section['ownerText']['runs'][0]['text'], + 'length': section['lengthText']['simpleText'], + 'template': 'videos.html', + 'embedded': embedded_url.format(videoid=section['videoId']), + 'thumbnail': section['thumbnail']['thumbnails'][-1]['url'], + } + ) try: - token = result_json['onResponseReceivedCommands'][0]\ - .get('appendContinuationItemsAction')['continuationItems'][1]\ - .get('continuationItemRenderer')['continuationEndpoint']\ + token = ( + result_json['onResponseReceivedCommands'][0] + .get('appendContinuationItemsAction')['continuationItems'][1] + .get('continuationItemRenderer')['continuationEndpoint'] .get('continuationCommand')['token'] - results.append({ - "engine_data": token, - "key": "next_page_token", - }) + ) + results.append( + { + "engine_data": token, + "key": "next_page_token", + } + ) except: pass @@ -107,26 +116,32 @@ def parse_next_page_response(response_text): def parse_first_page_response(response_text): results = [] - results_data = response_text[response_text.find('ytInitialData'):] - results_data = results_data[results_data.find('{'):results_data.find(';')] + results_data = response_text[response_text.find('ytInitialData') :] + results_data = results_data[results_data.find('{') : results_data.find(';')] results_json = loads(results_data) if results_data else {} - sections = results_json.get('contents', {})\ - .get('twoColumnSearchResultsRenderer', {})\ - .get('primaryContents', {})\ - .get('sectionListRenderer', {})\ - .get('contents', []) + sections = ( + results_json.get('contents', {}) + .get('twoColumnSearchResultsRenderer', {}) + .get('primaryContents', {}) + .get('sectionListRenderer', {}) + .get('contents', []) + ) for section in sections: if "continuationItemRenderer" in section: - next_page_token = section["continuationItemRenderer"]\ - .get("continuationEndpoint", {})\ - .get("continuationCommand", {})\ + next_page_token = ( + section["continuationItemRenderer"] + .get("continuationEndpoint", {}) + .get("continuationCommand", {}) .get("token", "") + ) if next_page_token: - results.append({ - "engine_data": next_page_token, - "key": "next_page_token", - }) + results.append( + { + "engine_data": next_page_token, + "key": "next_page_token", + } + ) for video_container in section.get('itemSectionRenderer', {}).get('contents', []): video = video_container.get('videoRenderer', {}) videoid = video.get('videoId') @@ -140,14 +155,18 @@ def parse_first_page_response(response_text): length = get_text_from_json(video.get('lengthText', {})) # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'author': author, - 'length': length, - 'template': 'videos.html', - 'embedded': embedded, - 'thumbnail': thumbnail}) + results.append( + { + 'url': url, + 'title': title, + 'content': content, + 'author': author, + 'length': length, + 'template': 'videos.html', + 'embedded': embedded, + 'thumbnail': thumbnail, + } + ) # return results return results diff --git a/searx/engines/zlibrary.py b/searx/engines/zlibrary.py index 180e9e355..81d93ac84 100644 --- a/searx/engines/zlibrary.py +++ b/searx/engines/zlibrary.py @@ -31,25 +31,23 @@ categories = ['files'] paging = True base_url = '' + def init(engine_settings=None): - global base_url # pylint: disable=global-statement + global base_url # pylint: disable=global-statement if "base_url" not in engine_settings: resp = http_get('https://z-lib.org', timeout=5.0) if resp.ok: dom = html.fromstring(resp.text) - base_url = "https:" + extract_text(eval_xpath(dom, - './/a[contains(@class, "domain-check-link") and @data-mode="books"]/@href' - )) + base_url = "https:" + extract_text( + eval_xpath(dom, './/a[contains(@class, "domain-check-link") and @data-mode="books"]/@href') + ) logger.debug("using base_url: %s" % base_url) def request(query, params): search_url = base_url + '/s/{search_query}/?page={pageno}' - params['url'] = search_url.format( - search_query=quote(query), - pageno=params['pageno'] - ) + params['url'] = search_url.format(search_query=quote(query), pageno=params['pageno']) return params @@ -60,36 +58,34 @@ def response(resp): for item in dom.xpath('//div[@id="searchResultBox"]//div[contains(@class, "resItemBox")]'): result = {} - result["url"] = base_url + \ - item.xpath('(.//a[starts-with(@href, "/book/")])[1]/@href')[0] + result["url"] = base_url + item.xpath('(.//a[starts-with(@href, "/book/")])[1]/@href')[0] result["title"] = extract_text(eval_xpath(item, './/*[@itemprop="name"]')) - year = extract_text(eval_xpath( - item, './/div[contains(@class, "property_year")]//div[contains(@class, "property_value")]')) + year = extract_text( + eval_xpath(item, './/div[contains(@class, "property_year")]//div[contains(@class, "property_value")]') + ) if year: year = '(%s) ' % year - result["content"] = "{year}{authors}. {publisher}. Language: {language}. {file_type}. \ + result[ + "content" + ] = "{year}{authors}. {publisher}. Language: {language}. {file_type}. \ Book rating: {book_rating}, book quality: {book_quality}".format( - year = year, - authors = extract_text(eval_xpath(item, './/div[@class="authors"]')), - publisher = extract_text(eval_xpath(item, './/div[@title="Publisher"]')), - file_type = extract_text( - eval_xpath( - item, - './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]')), - language = extract_text( - eval_xpath( - item, - './/div[contains(@class, "property_language")]//div[contains(@class, "property_value")]')), - book_rating = extract_text( - eval_xpath( - item, './/span[contains(@class, "book-rating-interest-score")]')), - book_quality = extract_text( - eval_xpath( - item, './/span[contains(@class, "book-rating-quality-score")]')), - ) + year=year, + authors=extract_text(eval_xpath(item, './/div[@class="authors"]')), + publisher=extract_text(eval_xpath(item, './/div[@title="Publisher"]')), + file_type=extract_text( + eval_xpath(item, './/div[contains(@class, "property__file")]//div[contains(@class, "property_value")]') + ), + language=extract_text( + eval_xpath( + item, './/div[contains(@class, "property_language")]//div[contains(@class, "property_value")]' + ) + ), + book_rating=extract_text(eval_xpath(item, './/span[contains(@class, "book-rating-interest-score")]')), + book_quality=extract_text(eval_xpath(item, './/span[contains(@class, "book-rating-quality-score")]')), + ) result["img_src"] = extract_text(eval_xpath(item, './/img[contains(@class, "cover")]/@data-src')) diff --git a/searx/exceptions.py b/searx/exceptions.py index 67a282da2..1b106d40c 100644 --- a/searx/exceptions.py +++ b/searx/exceptions.py @@ -21,7 +21,6 @@ class SearxException(Exception): class SearxParameterException(SearxException): - def __init__(self, name, value): if value == '' or value is None: message = 'Empty ' + name + ' parameter' diff --git a/searx/external_urls.py b/searx/external_urls.py index 11c6a32d9..2657dba4b 100644 --- a/searx/external_urls.py +++ b/searx/external_urls.py @@ -8,7 +8,7 @@ IMDB_PREFIX_TO_URL_ID = { 'mn': 'imdb_name', 'ch': 'imdb_character', 'co': 'imdb_company', - 'ev': 'imdb_event' + 'ev': 'imdb_event', } HTTP_WIKIMEDIA_IMAGE = 'http://commons.wikimedia.org/wiki/Special:FilePath/' @@ -20,9 +20,9 @@ def get_imdb_url_id(imdb_item_id): def get_wikimedia_image_id(url): if url.startswith(HTTP_WIKIMEDIA_IMAGE): - return url[len(HTTP_WIKIMEDIA_IMAGE):] + return url[len(HTTP_WIKIMEDIA_IMAGE) :] if url.startswith('File:'): - return url[len('File:'):] + return url[len('File:') :] return url @@ -52,10 +52,12 @@ def get_external_url(url_id, item_id, alternative="default"): def get_earth_coordinates_url(latitude, longitude, osm_zoom, alternative='default'): - url = get_external_url('map', None, alternative)\ - .replace('${latitude}', str(latitude))\ - .replace('${longitude}', str(longitude))\ + url = ( + get_external_url('map', None, alternative) + .replace('${latitude}', str(latitude)) + .replace('${longitude}', str(longitude)) .replace('${zoom}', str(osm_zoom)) + ) return url diff --git a/searx/flaskfix.py b/searx/flaskfix.py index 47aabfa53..326c4b981 100644 --- a/searx/flaskfix.py +++ b/searx/flaskfix.py @@ -29,6 +29,7 @@ class ReverseProxyPathFix: :param wsgi_app: the WSGI application ''' + # pylint: disable=too-few-public-methods def __init__(self, wsgi_app): @@ -58,7 +59,7 @@ class ReverseProxyPathFix: environ['SCRIPT_NAME'] = script_name path_info = environ['PATH_INFO'] if path_info.startswith(script_name): - environ['PATH_INFO'] = path_info[len(script_name):] + environ['PATH_INFO'] = path_info[len(script_name) :] scheme = self.scheme or environ.get('HTTP_X_SCHEME', '') if scheme: diff --git a/searx/languages.py b/searx/languages.py index c44eb0b9e..1f157e517 100644 --- a/searx/languages.py +++ b/searx/languages.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- # list of language codes # this file is generated automatically by utils/fetch_languages.py -language_codes = \ -( ('af-ZA', 'Afrikaans', '', 'Afrikaans'), +language_codes = ( + ('af-ZA', 'Afrikaans', '', 'Afrikaans'), ('ar-EG', 'العربية', '', 'Arabic'), ('be-BY', 'Беларуская', '', 'Belarusian'), ('bg-BG', 'Български', '', 'Bulgarian'), @@ -74,4 +74,5 @@ language_codes = \ ('zh', '中文', '', 'Chinese'), ('zh-CN', '中文', '中国', 'Chinese'), ('zh-HK', '中文', '中國香港特別行政區', 'Chinese'), - ('zh-TW', '中文', '台灣', 'Chinese')) \ No newline at end of file + ('zh-TW', '中文', '台灣', 'Chinese'), +) diff --git a/searx/locales.py b/searx/locales.py index b791f35f3..62f64204f 100644 --- a/searx/locales.py +++ b/searx/locales.py @@ -51,11 +51,10 @@ def _get_locale_name(locale, locale_name): def initialize_locales(directory): - """Initialize global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`. - """ + """Initialize global names :py:obj:`LOCALE_NAMES`, :py:obj:`RTL_LOCALES`.""" for dirname in sorted(os.listdir(directory)): # Based on https://flask-babel.tkte.ch/_modules/flask_babel.html#Babel.list_translations - if not os.path.isdir( os.path.join(directory, dirname, 'LC_MESSAGES') ): + if not os.path.isdir(os.path.join(directory, dirname, 'LC_MESSAGES')): continue locale_name = dirname.replace('_', '-') info = LOCALE_NAMES.get(locale_name) diff --git a/searx/metrics/__init__.py b/searx/metrics/__init__.py index 995f182af..37f0ba121 100644 --- a/searx/metrics/__init__.py +++ b/searx/metrics/__init__.py @@ -12,11 +12,19 @@ from searx.engines import engines from .models import HistogramStorage, CounterStorage from .error_recorder import count_error, count_exception, errors_per_engines -__all__ = ["initialize", - "get_engines_stats", "get_engine_errors", - "histogram", "histogram_observe", "histogram_observe_time", - "counter", "counter_inc", "counter_add", - "count_error", "count_exception"] +__all__ = [ + "initialize", + "get_engines_stats", + "get_engine_errors", + "histogram", + "histogram_observe", + "histogram_observe_time", + "counter", + "counter_inc", + "counter_add", + "count_error", + "count_exception", +] ENDPOINTS = {'search'} @@ -72,7 +80,7 @@ def initialize(engine_names=None): # max_timeout = max of all the engine.timeout max_timeout = 2 - for engine_name in (engine_names or engines): + for engine_name in engine_names or engines: if engine_name in engines: max_timeout = max(max_timeout, engines[engine_name].timeout) @@ -81,7 +89,7 @@ def initialize(engine_names=None): histogram_size = int(1.5 * max_timeout / histogram_width) # engines - for engine_name in (engine_names or engines): + for engine_name in engine_names or engines: # search count counter_storage.configure('engine', engine_name, 'search', 'count', 'sent') counter_storage.configure('engine', engine_name, 'search', 'count', 'successful') @@ -112,17 +120,19 @@ def get_engine_errors(engline_name_list): r = [] for context, count in sorted_context_count_list: percentage = round(20 * count / sent_search_count) * 5 - r.append({ - 'filename': context.filename, - 'function': context.function, - 'line_no': context.line_no, - 'code': context.code, - 'exception_classname': context.exception_classname, - 'log_message': context.log_message, - 'log_parameters': context.log_parameters, - 'secondary': context.secondary, - 'percentage': percentage, - }) + r.append( + { + 'filename': context.filename, + 'function': context.function, + 'line_no': context.line_no, + 'code': context.code, + 'exception_classname': context.exception_classname, + 'log_message': context.log_message, + 'log_parameters': context.log_parameters, + 'secondary': context.secondary, + 'percentage': percentage, + } + ) result[engine_name] = sorted(r, reverse=True, key=lambda d: d['percentage']) return result diff --git a/searx/metrics/error_recorder.py b/searx/metrics/error_recorder.py index 37594e5e8..76d27f64f 100644 --- a/searx/metrics/error_recorder.py +++ b/searx/metrics/error_recorder.py @@ -3,8 +3,12 @@ import inspect from json import JSONDecodeError from urllib.parse import urlparse from httpx import HTTPError, HTTPStatusError -from searx.exceptions import (SearxXPathSyntaxException, SearxEngineXPathException, SearxEngineAPIException, - SearxEngineAccessDeniedException) +from searx.exceptions import ( + SearxXPathSyntaxException, + SearxEngineXPathException, + SearxEngineAPIException, + SearxEngineAccessDeniedException, +) from searx import searx_parent_dir from searx.engines import engines @@ -14,8 +18,16 @@ errors_per_engines = {} class ErrorContext: - __slots__ = ('filename', 'function', 'line_no', 'code', 'exception_classname', - 'log_message', 'log_parameters', 'secondary') + __slots__ = ( + 'filename', + 'function', + 'line_no', + 'code', + 'exception_classname', + 'log_message', + 'log_parameters', + 'secondary', + ) def __init__(self, filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary): self.filename = filename @@ -30,19 +42,41 @@ class ErrorContext: def __eq__(self, o) -> bool: if not isinstance(o, ErrorContext): return False - return self.filename == o.filename and self.function == o.function and self.line_no == o.line_no\ - and self.code == o.code and self.exception_classname == o.exception_classname\ - and self.log_message == o.log_message and self.log_parameters == o.log_parameters \ + return ( + self.filename == o.filename + and self.function == o.function + and self.line_no == o.line_no + and self.code == o.code + and self.exception_classname == o.exception_classname + and self.log_message == o.log_message + and self.log_parameters == o.log_parameters and self.secondary == o.secondary + ) def __hash__(self): - return hash((self.filename, self.function, self.line_no, self.code, self.exception_classname, self.log_message, - self.log_parameters, self.secondary)) + return hash( + ( + self.filename, + self.function, + self.line_no, + self.code, + self.exception_classname, + self.log_message, + self.log_parameters, + self.secondary, + ) + ) def __repr__(self): - return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".\ - format(self.filename, self.line_no, self.code, self.exception_classname, self.log_message, - self.log_parameters, self.secondary) + return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".format( + self.filename, + self.line_no, + self.code, + self.exception_classname, + self.log_message, + self.log_parameters, + self.secondary, + ) def add_error_context(engine_name: str, error_context: ErrorContext) -> None: @@ -68,8 +102,9 @@ def get_hostname(exc: HTTPError) -> typing.Optional[None]: return urlparse(url).netloc -def get_request_exception_messages(exc: HTTPError)\ - -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]: +def get_request_exception_messages( + exc: HTTPError, +) -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]: url = None status_code = None reason = None @@ -90,11 +125,11 @@ def get_request_exception_messages(exc: HTTPError)\ def get_messages(exc, filename) -> typing.Tuple: if isinstance(exc, JSONDecodeError): - return (exc.msg, ) + return (exc.msg,) if isinstance(exc, TypeError): - return (str(exc), ) + return (str(exc),) if isinstance(exc, ValueError) and 'lxml' in filename: - return (str(exc), ) + return (str(exc),) if isinstance(exc, HTTPError): return get_request_exception_messages(exc) if isinstance(exc, SearxXPathSyntaxException): @@ -102,9 +137,9 @@ def get_messages(exc, filename) -> typing.Tuple: if isinstance(exc, SearxEngineXPathException): return (exc.xpath_str, exc.message) if isinstance(exc, SearxEngineAPIException): - return (str(exc.args[0]), ) + return (str(exc.args[0]),) if isinstance(exc, SearxEngineAccessDeniedException): - return (exc.message, ) + return (exc.message,) return () @@ -121,7 +156,7 @@ def get_error_context(framerecords, exception_classname, log_message, log_parame searx_frame = get_trace(framerecords) filename = searx_frame.filename if filename.startswith(searx_parent_dir): - filename = filename[len(searx_parent_dir) + 1:] + filename = filename[len(searx_parent_dir) + 1 :] function = searx_frame.function line_no = searx_frame.lineno code = searx_frame.code_context[0].strip() @@ -140,8 +175,9 @@ def count_exception(engine_name: str, exc: Exception, secondary: bool = False) - del framerecords -def count_error(engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, - secondary: bool = False) -> None: +def count_error( + engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, secondary: bool = False +) -> None: framerecords = list(reversed(inspect.stack()[1:])) try: error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary) diff --git a/searx/metrics/models.py b/searx/metrics/models.py index 8936a51e3..d42569b7f 100644 --- a/searx/metrics/models.py +++ b/searx/metrics/models.py @@ -58,7 +58,7 @@ class Histogram: @property def quartile_percentage(self): - ''' Quartile in percentage ''' + '''Quartile in percentage''' with self._lock: if self._count > 0: return [int(q * 100 / self._count) for q in self._quartiles] diff --git a/searx/network/__init__.py b/searx/network/__init__.py index 82959e355..7d02a0014 100644 --- a/searx/network/__init__.py +++ b/searx/network/__init__.py @@ -35,7 +35,7 @@ except ImportError: self._count.release() def get(self): - if not self._count.acquire(True): #pylint: disable=consider-using-with + if not self._count.acquire(True): # pylint: disable=consider-using-with raise Empty return self._queue.popleft() @@ -43,6 +43,7 @@ except ImportError: THREADLOCAL = threading.local() """Thread-local data is data for thread specific values.""" + def reset_time_for_thread(): THREADLOCAL.total_time = 0 @@ -187,10 +188,7 @@ async def stream_chunk_to_queue(network, queue, method, url, **kwargs): def _stream_generator(method, url, **kwargs): queue = SimpleQueue() network = get_context_network() - future = asyncio.run_coroutine_threadsafe( - stream_chunk_to_queue(network, queue, method, url, **kwargs), - get_loop() - ) + future = asyncio.run_coroutine_threadsafe(stream_chunk_to_queue(network, queue, method, url, **kwargs), get_loop()) # yield chunks obj_or_exception = queue.get() @@ -203,10 +201,7 @@ def _stream_generator(method, url, **kwargs): def _close_response_method(self): - asyncio.run_coroutine_threadsafe( - self.aclose(), - get_loop() - ) + asyncio.run_coroutine_threadsafe(self.aclose(), get_loop()) # reach the end of _self.generator ( _stream_generator ) to an avoid memory leak. # it makes sure that : # * the httpx response is closed (see the stream_chunk_to_queue function) diff --git a/searx/network/client.py b/searx/network/client.py index a6cec352d..cd1e41460 100644 --- a/searx/network/client.py +++ b/searx/network/client.py @@ -10,12 +10,7 @@ import anyio import httpcore import httpx from httpx_socks import AsyncProxyTransport -from python_socks import ( - parse_proxy_url, - ProxyConnectionError, - ProxyTimeoutError, - ProxyError -) +from python_socks import parse_proxy_url, ProxyConnectionError, ProxyTimeoutError, ProxyError from searx import logger @@ -41,9 +36,7 @@ TRANSPORT_KWARGS = { # pylint: disable=protected-access -async def close_connections_for_url( - connection_pool: httpcore.AsyncConnectionPool, url: httpcore._utils.URL -): +async def close_connections_for_url(connection_pool: httpcore.AsyncConnectionPool, url: httpcore._utils.URL): origin = httpcore._utils.url_to_origin(url) logger.debug('Drop connections for %r', origin) @@ -54,6 +47,8 @@ async def close_connections_for_url( await connection.aclose() except httpx.NetworkError as e: logger.warning('Error closing an existing connection', exc_info=e) + + # pylint: enable=protected-access @@ -67,9 +62,7 @@ def get_sslcontexts(proxy_url=None, cert=None, verify=True, trust_env=True, http class AsyncHTTPTransportNoHttp(httpx.AsyncHTTPTransport): """Block HTTP request""" - async def handle_async_request( - self, method, url, headers=None, stream=None, extensions=None - ): + async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): raise httpx.UnsupportedProtocol('HTTP protocol is disabled') @@ -83,9 +76,7 @@ class AsyncProxyTransportFixed(AsyncProxyTransport): Note: AsyncProxyTransport inherit from AsyncConnectionPool """ - async def handle_async_request( - self, method, url, headers=None, stream=None, extensions=None - ): + async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): retry = 2 while retry > 0: retry -= 1 @@ -116,9 +107,7 @@ class AsyncProxyTransportFixed(AsyncProxyTransport): class AsyncHTTPTransportFixed(httpx.AsyncHTTPTransport): """Fix httpx.AsyncHTTPTransport""" - async def handle_async_request( - self, method, url, headers=None, stream=None, extensions=None - ): + async def handle_async_request(self, method, url, headers=None, stream=None, extensions=None): retry = 2 while retry > 0: retry -= 1 @@ -152,14 +141,17 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit rdns = False socks5h = 'socks5h://' if proxy_url.startswith(socks5h): - proxy_url = 'socks5://' + proxy_url[len(socks5h):] + proxy_url = 'socks5://' + proxy_url[len(socks5h) :] rdns = True proxy_type, proxy_host, proxy_port, proxy_username, proxy_password = parse_proxy_url(proxy_url) verify = get_sslcontexts(proxy_url, None, True, False, http2) if verify is True else verify return AsyncProxyTransportFixed( - proxy_type=proxy_type, proxy_host=proxy_host, proxy_port=proxy_port, - username=proxy_username, password=proxy_password, + proxy_type=proxy_type, + proxy_host=proxy_host, + proxy_port=proxy_port, + username=proxy_username, + password=proxy_password, rdns=rdns, loop=get_loop(), verify=verify, @@ -169,7 +161,7 @@ def get_transport_for_socks_proxy(verify, http2, local_address, proxy_url, limit max_keepalive_connections=limit.max_keepalive_connections, keepalive_expiry=limit.keepalive_expiry, retries=retries, - **TRANSPORT_KWARGS + **TRANSPORT_KWARGS, ) @@ -183,36 +175,40 @@ def get_transport(verify, http2, local_address, proxy_url, limit, retries): proxy=httpx._config.Proxy(proxy_url) if proxy_url else None, limits=limit, retries=retries, - **TRANSPORT_KWARGS + **TRANSPORT_KWARGS, ) def new_client( - # pylint: disable=too-many-arguments - enable_http, verify, enable_http2, - max_connections, max_keepalive_connections, keepalive_expiry, - proxies, local_address, retries, max_redirects, hook_log_response ): + # pylint: disable=too-many-arguments + enable_http, + verify, + enable_http2, + max_connections, + max_keepalive_connections, + keepalive_expiry, + proxies, + local_address, + retries, + max_redirects, + hook_log_response, +): limit = httpx.Limits( max_connections=max_connections, max_keepalive_connections=max_keepalive_connections, - keepalive_expiry=keepalive_expiry + keepalive_expiry=keepalive_expiry, ) # See https://www.python-httpx.org/advanced/#routing mounts = {} for pattern, proxy_url in proxies.items(): if not enable_http and pattern.startswith('http://'): continue - if (proxy_url.startswith('socks4://') - or proxy_url.startswith('socks5://') - or proxy_url.startswith('socks5h://') - ): + if proxy_url.startswith('socks4://') or proxy_url.startswith('socks5://') or proxy_url.startswith('socks5h://'): mounts[pattern] = get_transport_for_socks_proxy( verify, enable_http2, local_address, proxy_url, limit, retries ) else: - mounts[pattern] = get_transport( - verify, enable_http2, local_address, proxy_url, limit, retries - ) + mounts[pattern] = get_transport(verify, enable_http2, local_address, proxy_url, limit, retries) if not enable_http: mounts['http://'] = AsyncHTTPTransportNoHttp() @@ -221,7 +217,7 @@ def new_client( event_hooks = None if hook_log_response: - event_hooks = {'response': [ hook_log_response ]} + event_hooks = {'response': [hook_log_response]} return httpx.AsyncClient( transport=transport, diff --git a/searx/network/network.py b/searx/network/network.py index 613b9ff27..9e14e14bd 100644 --- a/searx/network/network.py +++ b/searx/network/network.py @@ -31,39 +31,49 @@ PROXY_PATTERN_MAPPING = { 'socks5h:': 'socks5h://', } -ADDRESS_MAPPING = { - 'ipv4': '0.0.0.0', - 'ipv6': '::' -} +ADDRESS_MAPPING = {'ipv4': '0.0.0.0', 'ipv6': '::'} class Network: __slots__ = ( - 'enable_http', 'verify', 'enable_http2', - 'max_connections', 'max_keepalive_connections', 'keepalive_expiry', - 'local_addresses', 'proxies', 'using_tor_proxy', 'max_redirects', 'retries', 'retry_on_http_error', - '_local_addresses_cycle', '_proxies_cycle', '_clients', '_logger' + 'enable_http', + 'verify', + 'enable_http2', + 'max_connections', + 'max_keepalive_connections', + 'keepalive_expiry', + 'local_addresses', + 'proxies', + 'using_tor_proxy', + 'max_redirects', + 'retries', + 'retry_on_http_error', + '_local_addresses_cycle', + '_proxies_cycle', + '_clients', + '_logger', ) _TOR_CHECK_RESULT = {} def __init__( - # pylint: disable=too-many-arguments - self, - enable_http=True, - verify=True, - enable_http2=False, - max_connections=None, - max_keepalive_connections=None, - keepalive_expiry=None, - proxies=None, - using_tor_proxy=False, - local_addresses=None, - retries=0, - retry_on_http_error=None, - max_redirects=30, - logger_name=None): + # pylint: disable=too-many-arguments + self, + enable_http=True, + verify=True, + enable_http2=False, + max_connections=None, + max_keepalive_connections=None, + keepalive_expiry=None, + proxies=None, + using_tor_proxy=False, + local_addresses=None, + retries=0, + retry_on_http_error=None, + max_redirects=30, + logger_name=None, + ): self.enable_http = enable_http self.verify = verify @@ -144,9 +154,7 @@ class Network: response_line = f"{response.http_version} {status}" content_type = response.headers.get("Content-Type") content_type = f' ({content_type})' if content_type else '' - self._logger.debug( - f'HTTP Request: {request.method} {request.url} "{response_line}"{content_type}' - ) + self._logger.debug(f'HTTP Request: {request.method} {request.url} "{response_line}"{content_type}') @staticmethod async def check_tor_proxy(client: httpx.AsyncClient, proxies) -> bool: @@ -187,7 +195,7 @@ class Network: local_address, 0, max_redirects, - hook_log_response + hook_log_response, ) if self.using_tor_proxy and not await self.check_tor_proxy(client, proxies): await client.aclose() @@ -201,6 +209,7 @@ class Network: await client.aclose() except httpx.HTTPError: pass + await asyncio.gather(*[close_client(client) for client in self._clients.values()], return_exceptions=False) @staticmethod @@ -214,7 +223,8 @@ class Network: def is_valid_respones(self, response): # pylint: disable=too-many-boolean-expressions - if ((self.retry_on_http_error is True and 400 <= response.status_code <= 599) + if ( + (self.retry_on_http_error is True and 400 <= response.status_code <= 599) or (isinstance(self.retry_on_http_error, list) and response.status_code in self.retry_on_http_error) or (isinstance(self.retry_on_http_error, int) and response.status_code == self.retry_on_http_error) ): @@ -269,6 +279,7 @@ def check_network_configuration(): network._logger.exception('Error') # pylint: disable=protected-access exception_count += 1 return exception_count + future = asyncio.run_coroutine_threadsafe(check(), get_loop()) exception_count = future.result() if exception_count > 0: @@ -279,6 +290,7 @@ def initialize(settings_engines=None, settings_outgoing=None): # pylint: disable=import-outside-toplevel) from searx.engines import engines from searx import settings + # pylint: enable=import-outside-toplevel) settings_engines = settings_engines or settings['engines'] diff --git a/searx/network/raise_for_httperror.py b/searx/network/raise_for_httperror.py index a2f554614..414074977 100644 --- a/searx/network/raise_for_httperror.py +++ b/searx/network/raise_for_httperror.py @@ -10,13 +10,14 @@ from searx.exceptions import ( SearxEngineAccessDeniedException, ) + def is_cloudflare_challenge(resp): if resp.status_code in [429, 503]: - if (('__cf_chl_jschl_tk__=' in resp.text) - or ('/cdn-cgi/challenge-platform/' in resp.text - and 'orchestrate/jsch/v1' in resp.text - and 'window._cf_chl_enter(' in resp.text - )): + if ('__cf_chl_jschl_tk__=' in resp.text) or ( + '/cdn-cgi/challenge-platform/' in resp.text + and 'orchestrate/jsch/v1' in resp.text + and 'window._cf_chl_enter(' in resp.text + ): return True if resp.status_code == 403 and '__cf_chl_captcha_tk__=' in resp.text: return True @@ -32,21 +33,14 @@ def raise_for_cloudflare_captcha(resp): if is_cloudflare_challenge(resp): # https://support.cloudflare.com/hc/en-us/articles/200170136-Understanding-Cloudflare-Challenge-Passage-Captcha- # suspend for 2 weeks - raise SearxEngineCaptchaException( - message='Cloudflare CAPTCHA', - suspended_time=3600 * 24 * 15 - ) + raise SearxEngineCaptchaException(message='Cloudflare CAPTCHA', suspended_time=3600 * 24 * 15) if is_cloudflare_firewall(resp): - raise SearxEngineAccessDeniedException( - message='Cloudflare Firewall', suspended_time=3600 * 24 - ) + raise SearxEngineAccessDeniedException(message='Cloudflare Firewall', suspended_time=3600 * 24) def raise_for_recaptcha(resp): - if (resp.status_code == 503 - and '"https://www.google.com/recaptcha/' in resp.text - ): + if resp.status_code == 503 and '"https://www.google.com/recaptcha/' in resp.text: raise SearxEngineCaptchaException(message='ReCAPTCHA', suspended_time=3600 * 24 * 7) @@ -71,8 +65,7 @@ def raise_for_httperror(resp): raise_for_captcha(resp) if resp.status_code in (402, 403): raise SearxEngineAccessDeniedException( - message='HTTP error ' + str(resp.status_code), - suspended_time=3600 * 24 + message='HTTP error ' + str(resp.status_code), suspended_time=3600 * 24 ) if resp.status_code == 429: raise SearxEngineTooManyRequestsException() diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index 91636fe33..7815c2099 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -17,15 +17,19 @@ from searx import logger, settings logger = logger.getChild("plugins") required_attrs = ( + # fmt: off ("name", str), ("description", str), ("default_on", bool) + # fmt: on ) optional_attrs = ( + # fmt: off ("js_dependencies", tuple), ("css_dependencies", tuple), ("preference_section", str), + # fmt: on ) @@ -47,11 +51,7 @@ def sync_resource(base_path, resource_path, name, target_dir, plugin_dir): dep_stat = stat(dep_path) utime(resource_path, ns=(dep_stat.st_atime_ns, dep_stat.st_mtime_ns)) except IOError: - logger.critical( - "failed to copy plugin resource {0} for plugin {1}".format( - file_name, name - ) - ) + logger.critical("failed to copy plugin resource {0} for plugin {1}".format(file_name, name)) sys.exit(3) # returning with the web path of the resource @@ -62,36 +62,28 @@ def prepare_package_resources(plugin, plugin_module_name): plugin_base_path = dirname(abspath(plugin.__file__)) plugin_dir = plugin_module_name - target_dir = join( - settings["ui"]["static_path"], "plugins/external_plugins", plugin_dir - ) + target_dir = join(settings["ui"]["static_path"], "plugins/external_plugins", plugin_dir) try: makedirs(target_dir, exist_ok=True) except IOError: - logger.critical( - "failed to create resource directory {0} for plugin {1}".format( - target_dir, plugin_module_name - ) - ) + logger.critical("failed to create resource directory {0} for plugin {1}".format(target_dir, plugin_module_name)) sys.exit(3) resources = [] if hasattr(plugin, "js_dependencies"): resources.extend(map(basename, plugin.js_dependencies)) - plugin.js_dependencies = ([ - sync_resource( - plugin_base_path, x, plugin_module_name, target_dir, plugin_dir - ) for x in plugin.js_dependencies - ]) + plugin.js_dependencies = [ + sync_resource(plugin_base_path, x, plugin_module_name, target_dir, plugin_dir) + for x in plugin.js_dependencies + ] if hasattr(plugin, "css_dependencies"): resources.extend(map(basename, plugin.css_dependencies)) - plugin.css_dependencies = ([ - sync_resource( - plugin_base_path, x, plugin_module_name, target_dir, plugin_dir - ) for x in plugin.css_dependencies - ]) + plugin.css_dependencies = [ + sync_resource(plugin_base_path, x, plugin_module_name, target_dir, plugin_dir) + for x in plugin.css_dependencies + ] for f in listdir(target_dir): if basename(f) not in resources: @@ -100,9 +92,7 @@ def prepare_package_resources(plugin, plugin_module_name): remove(resource_path) except IOError: logger.critical( - "failed to remove unused resource file {0} for plugin {1}".format( - resource_path, plugin_module_name - ) + "failed to remove unused resource file {0} for plugin {1}".format(resource_path, plugin_module_name) ) sys.exit(3) @@ -133,9 +123,7 @@ def load_plugin(plugin_module_name, external): for plugin_attr, plugin_attr_type in required_attrs: if not hasattr(plugin, plugin_attr): - logger.critical( - '%s: missing attribute "%s", cannot load plugin', plugin, plugin_attr - ) + logger.critical('%s: missing attribute "%s", cannot load plugin', plugin, plugin_attr) sys.exit(3) attr = getattr(plugin, plugin_attr) if not isinstance(attr, plugin_attr_type): @@ -148,9 +136,7 @@ def load_plugin(plugin_module_name, external): sys.exit(3) for plugin_attr, plugin_attr_type in optional_attrs: - if not hasattr(plugin, plugin_attr) or not isinstance( - getattr(plugin, plugin_attr), plugin_attr_type - ): + if not hasattr(plugin, plugin_attr) or not isinstance(getattr(plugin, plugin_attr), plugin_attr_type): setattr(plugin, plugin_attr, plugin_attr_type()) if not hasattr(plugin, "preference_section"): @@ -160,19 +146,12 @@ def load_plugin(plugin_module_name, external): if plugin.preference_section == "query": for plugin_attr in ("query_keywords", "query_examples"): if not hasattr(plugin, plugin_attr): - logger.critical( - 'missing attribute "{0}", cannot load plugin: {1}'.format( - plugin_attr, plugin - ) - ) + logger.critical('missing attribute "{0}", cannot load plugin: {1}'.format(plugin_attr, plugin)) sys.exit(3) if settings.get("enabled_plugins"): # searx compatibility: plugin.name in settings['enabled_plugins'] - plugin.default_on = ( - plugin.name in settings["enabled_plugins"] - or plugin.id in settings["enabled_plugins"] - ) + plugin.default_on = plugin.name in settings["enabled_plugins"] or plugin.id in settings["enabled_plugins"] # copy ressources if this is an external plugin if external: @@ -189,9 +168,7 @@ def load_and_initialize_plugin(plugin_module_name, external, init_args): try: return plugin if plugin.init(*init_args) else None except Exception: # pylint: disable=broad-except - plugin.logger.exception( - "Exception while calling init, the plugin is disabled" - ) + plugin.logger.exception("Exception while calling init, the plugin is disabled") return None return plugin diff --git a/searx/plugins/oa_doi_rewrite.py b/searx/plugins/oa_doi_rewrite.py index 2dcc01e05..54d28bc9a 100644 --- a/searx/plugins/oa_doi_rewrite.py +++ b/searx/plugins/oa_doi_rewrite.py @@ -39,7 +39,7 @@ def on_result(request, search, result): if doi and len(doi) < 50: for suffix in ('/', '.pdf', '.xml', '/full', '/meta', '/abstract'): if doi.endswith(suffix): - doi = doi[:-len(suffix)] + doi = doi[: -len(suffix)] result['url'] = get_doi_resolver(request.preferences) + doi result['parsed_url'] = urlparse(result['url']) return True diff --git a/searx/plugins/search_on_category_select.py b/searx/plugins/search_on_category_select.py index 2a38cac78..48d537cee 100644 --- a/searx/plugins/search_on_category_select.py +++ b/searx/plugins/search_on_category_select.py @@ -15,9 +15,11 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2015 by Adam Tauber, ''' from flask_babel import gettext + name = gettext('Search on category select') -description = gettext('Perform search immediately if a category selected. ' - 'Disable to select multiple categories. (JavaScript required)') +description = gettext( + 'Perform search immediately if a category selected. ' 'Disable to select multiple categories. (JavaScript required)' +) default_on = True preference_section = 'ui' diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py index 053899483..29bd5ca5c 100644 --- a/searx/plugins/self_info.py +++ b/searx/plugins/self_info.py @@ -16,6 +16,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. ''' from flask_babel import gettext import re + name = gettext('Self Informations') description = gettext('Displays your IP if the query is "ip" and your user agent if the query contains "user agent".') default_on = True diff --git a/searx/plugins/tracker_url_remover.py b/searx/plugins/tracker_url_remover.py index 98ddddbcd..42c58e524 100644 --- a/searx/plugins/tracker_url_remover.py +++ b/searx/plugins/tracker_url_remover.py @@ -19,10 +19,12 @@ from flask_babel import gettext import re from urllib.parse import urlunparse, parse_qsl, urlencode -regexes = {re.compile(r'utm_[^&]+'), - re.compile(r'(wkey|wemail)[^&]*'), - re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'), - re.compile(r'&$')} +regexes = { + re.compile(r'utm_[^&]+'), + re.compile(r'(wkey|wemail)[^&]*'), + re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'), + re.compile(r'&$'), +} name = gettext('Tracker URL remover') description = gettext('Remove trackers arguments from the returned URL') diff --git a/searx/plugins/vim_hotkeys.py b/searx/plugins/vim_hotkeys.py index 47b830c79..fb61d413b 100644 --- a/searx/plugins/vim_hotkeys.py +++ b/searx/plugins/vim_hotkeys.py @@ -1,9 +1,11 @@ from flask_babel import gettext name = gettext('Vim-like hotkeys') -description = gettext('Navigate search results with Vim-like hotkeys ' - '(JavaScript required). ' - 'Press "h" key on main or result page to get help.') +description = gettext( + 'Navigate search results with Vim-like hotkeys ' + '(JavaScript required). ' + 'Press "h" key on main or result page to get help.' +) default_on = False preference_section = 'ui' diff --git a/searx/preferences.py b/searx/preferences.py index 4d0cc5c0a..2a9b0af0c 100644 --- a/searx/preferences.py +++ b/searx/preferences.py @@ -21,14 +21,12 @@ DOI_RESOLVERS = list(settings['doi_resolvers']) class MissingArgumentException(Exception): - """Exption from ``cls._post_init`` when a argument is missed. - """ + """Exption from ``cls._post_init`` when a argument is missed.""" class ValidationException(Exception): - """Exption from ``cls._post_init`` when configuration value is invalid. - """ + """Exption from ``cls._post_init`` when configuration value is invalid.""" class Setting: @@ -84,8 +82,7 @@ class EnumStringSetting(Setting): raise ValidationException('Invalid value: "{0}"'.format(selection)) def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" self._validate_selection(data) self.value = data @@ -104,8 +101,7 @@ class MultipleChoiceSetting(EnumStringSetting): self._validate_selections(self.value) def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" if data == '': self.value = [] return @@ -124,25 +120,23 @@ class MultipleChoiceSetting(EnumStringSetting): self.value.append(choice) def save(self, name, resp): - """Save cookie ``name`` in the HTTP reponse obect - """ + """Save cookie ``name`` in the HTTP reponse obect""" resp.set_cookie(name, ','.join(self.value), max_age=COOKIE_MAX_AGE) class SetSetting(Setting): - """Setting of values of type ``set`` (comma separated string) """ + """Setting of values of type ``set`` (comma separated string)""" + def _post_init(self): if not hasattr(self, 'values'): self.values = set() def get_value(self): - """Returns a string with comma separated values. - """ + """Returns a string with comma separated values.""" return ','.join(self.values) def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" if data == '': self.values = set() # pylint: disable=attribute-defined-outside-init return @@ -159,8 +153,7 @@ class SetSetting(Setting): self.values = set(elements) # pylint: disable=attribute-defined-outside-init def save(self, name, resp): - """Save cookie ``name`` in the HTTP reponse obect - """ + """Save cookie ``name`` in the HTTP reponse obect""" resp.set_cookie(name, ','.join(self.values), max_age=COOKIE_MAX_AGE) @@ -172,8 +165,7 @@ class SearchLanguageSetting(EnumStringSetting): raise ValidationException('Invalid language code: "{0}"'.format(selection)) def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" if data not in self.choices and data != self.value: # pylint: disable=no-member # hack to give some backwards compatibility with old language cookies data = str(data).replace('_', '-') @@ -199,8 +191,7 @@ class MapSetting(Setting): raise ValidationException('Invalid default value') def parse(self, data): - """Parse and validate ``data`` and store the result at ``self.value`` - """ + """Parse and validate ``data`` and store the result at ``self.value``""" # pylint: disable=no-member if data not in self.map: raise ValidationException('Invalid choice: {0}'.format(data)) @@ -208,14 +199,13 @@ class MapSetting(Setting): self.key = data # pylint: disable=attribute-defined-outside-init def save(self, name, resp): - """Save cookie ``name`` in the HTTP reponse obect - """ + """Save cookie ``name`` in the HTTP reponse obect""" if hasattr(self, 'key'): resp.set_cookie(name, self.key, max_age=COOKIE_MAX_AGE) class SwitchableSetting(Setting): - """ Base class for settings that can be turned on && off""" + """Base class for settings that can be turned on && off""" def _post_init(self): self.disabled = set() @@ -244,7 +234,7 @@ class SwitchableSetting(Setting): items = self.transform_form_items(items) self.disabled = set() # pylint: disable=attribute-defined-outside-init - self.enabled = set() # pylint: disable=attribute-defined-outside-init + self.enabled = set() # pylint: disable=attribute-defined-outside-init for choice in self.choices: # pylint: disable=no-member if choice['default_on']: if choice['id'] in items: @@ -254,8 +244,7 @@ class SwitchableSetting(Setting): self.enabled.add(choice['id']) def save(self, resp): # pylint: disable=arguments-differ - """Save cookie in the HTTP reponse obect - """ + """Save cookie in the HTTP reponse obect""" resp.set_cookie('disabled_{0}'.format(self.value), ','.join(self.disabled), max_age=COOKIE_MAX_AGE) resp.set_cookie('enabled_{0}'.format(self.value), ','.join(self.enabled), max_age=COOKIE_MAX_AGE) @@ -289,7 +278,7 @@ class EnginesSetting(SwitchableSetting): self.choices = transformed_choices def transform_form_items(self, items): - return [item[len('engine_'):].replace('_', ' ').replace(' ', '__') for item in items] + return [item[len('engine_') :].replace('_', ' ').replace(' ', '__') for item in items] def transform_values(self, values): if len(values) == 1 and next(iter(values)) == '': @@ -315,7 +304,7 @@ class PluginsSetting(SwitchableSetting): self.choices = transformed_choices def transform_form_items(self, items): - return [item[len('plugin_'):] for item in items] + return [item[len('plugin_') :] for item in items] class Preferences: @@ -325,6 +314,7 @@ class Preferences: super().__init__() self.key_value_settings = { + # fmt: off 'categories': MultipleChoiceSetting( ['general'], is_locked('categories'), @@ -422,6 +412,7 @@ class Preferences: 'False': False } ), + # fmt: on } self.engines = EnginesSetting('engines', choices=engines) @@ -466,19 +457,18 @@ class Preferences: continue self.key_value_settings[user_setting_name].parse(user_setting) elif user_setting_name == 'disabled_engines': - self.engines.parse_cookie((input_data.get('disabled_engines', ''), - input_data.get('enabled_engines', ''))) + self.engines.parse_cookie( + (input_data.get('disabled_engines', ''), input_data.get('enabled_engines', '')) + ) elif user_setting_name == 'disabled_plugins': - self.plugins.parse_cookie((input_data.get('disabled_plugins', ''), - input_data.get('enabled_plugins', ''))) + self.plugins.parse_cookie( + (input_data.get('disabled_plugins', ''), input_data.get('enabled_plugins', '')) + ) elif user_setting_name == 'tokens': self.tokens.parse(user_setting) - elif not any(user_setting_name.startswith(x) for x in [ - 'enabled_', - 'disabled_', - 'engine_', - 'category_', - 'plugin_']): + elif not any( + user_setting_name.startswith(x) for x in ['enabled_', 'disabled_', 'engine_', 'category_', 'plugin_'] + ): self.unknown_params[user_setting_name] = user_setting def parse_form(self, input_data): @@ -492,7 +482,7 @@ class Preferences: elif user_setting_name.startswith('engine_'): disabled_engines.append(user_setting_name) elif user_setting_name.startswith('category_'): - enabled_categories.append(user_setting_name[len('category_'):]) + enabled_categories.append(user_setting_name[len('category_') :]) elif user_setting_name.startswith('plugin_'): disabled_plugins.append(user_setting_name) elif user_setting_name == 'tokens': @@ -505,8 +495,7 @@ class Preferences: # cannot be used in case of engines or plugins def get_value(self, user_setting_name): - """Returns the value for ``user_setting_name`` - """ + """Returns the value for ``user_setting_name``""" ret_val = None if user_setting_name in self.key_value_settings: ret_val = self.key_value_settings[user_setting_name].get_value() @@ -515,8 +504,7 @@ class Preferences: return ret_val def save(self, resp): - """Save cookie in the HTTP reponse obect - """ + """Save cookie in the HTTP reponse obect""" for user_setting_name, user_setting in self.key_value_settings.items(): # pylint: disable=unnecessary-dict-index-lookup if self.key_value_settings[user_setting_name].locked: @@ -542,8 +530,7 @@ class Preferences: def is_locked(setting_name): - """Checks if a given setting name is locked by settings.yml - """ + """Checks if a given setting name is locked by settings.yml""" if 'preferences' not in settings: return False if 'lock' not in settings['preferences']: diff --git a/searx/query.py b/searx/query.py index 7f252e93f..b7f64fe82 100644 --- a/searx/query.py +++ b/searx/query.py @@ -40,7 +40,6 @@ class QueryPartParser(ABC): class TimeoutParser(QueryPartParser): - @staticmethod def check(raw_value): return raw_value[0] == '<' @@ -70,7 +69,6 @@ class TimeoutParser(QueryPartParser): class LanguageParser(QueryPartParser): - @staticmethod def check(raw_value): return raw_value[0] == ':' @@ -92,11 +90,9 @@ class LanguageParser(QueryPartParser): # if correct language-code is found # set it as new search-language - if (value == lang_id - or value == lang_name - or value == english_name - or value.replace('-', ' ') == country)\ - and value not in self.raw_text_query.languages: + if ( + value == lang_id or value == lang_name or value == english_name or value.replace('-', ' ') == country + ) and value not in self.raw_text_query.languages: found = True lang_parts = lang_id.split('-') if len(lang_parts) == 2: @@ -152,7 +148,6 @@ class LanguageParser(QueryPartParser): class ExternalBangParser(QueryPartParser): - @staticmethod def check(raw_value): return raw_value.startswith('!!') @@ -180,7 +175,6 @@ class ExternalBangParser(QueryPartParser): class BangParser(QueryPartParser): - @staticmethod def check(raw_value): return raw_value[0] == '!' or raw_value[0] == '?' @@ -208,9 +202,11 @@ class BangParser(QueryPartParser): if value in categories: # using all engines for that search, which # are declared under that categorie name - self.raw_text_query.enginerefs.extend(EngineRef(engine.name, value) - for engine in categories[value] - if (engine.name, value) not in self.raw_text_query.disabled_engines) + self.raw_text_query.enginerefs.extend( + EngineRef(engine.name, value) + for engine in categories[value] + if (engine.name, value) not in self.raw_text_query.disabled_engines + ) return True return False @@ -246,7 +242,7 @@ class RawTextQuery: TimeoutParser, # this force the timeout LanguageParser, # this force a language ExternalBangParser, # external bang (must be before BangParser) - BangParser # this force a engine or category + BangParser, # this force a engine or category ] def __init__(self, query, disabled_engines): @@ -281,8 +277,7 @@ class RawTextQuery: for i, query_part in enumerate(raw_query_parts): # part does only contain spaces, skip - if query_part.isspace()\ - or query_part == '': + if query_part.isspace() or query_part == '': continue # parse special commands @@ -324,14 +319,16 @@ class RawTextQuery: return self.getFullQuery() def __repr__(self): - return f"<{self.__class__.__name__} " \ - + f"query={self.query!r} " \ - + f"disabled_engines={self.disabled_engines!r}\n " \ - + f"languages={self.languages!r} " \ - + f"timeout_limit={self.timeout_limit!r} "\ - + f"external_bang={self.external_bang!r} " \ - + f"specific={self.specific!r} " \ - + f"enginerefs={self.enginerefs!r}\n " \ - + f"autocomplete_list={self.autocomplete_list!r}\n " \ - + f"query_parts={self.query_parts!r}\n " \ - + f"user_query_parts={self.user_query_parts!r} >" + return ( + f"<{self.__class__.__name__} " + + f"query={self.query!r} " + + f"disabled_engines={self.disabled_engines!r}\n " + + f"languages={self.languages!r} " + + f"timeout_limit={self.timeout_limit!r} " + + f"external_bang={self.external_bang!r} " + + f"specific={self.specific!r} " + + f"enginerefs={self.enginerefs!r}\n " + + f"autocomplete_list={self.autocomplete_list!r}\n " + + f"query_parts={self.query_parts!r}\n " + + f"user_query_parts={self.user_query_parts!r} >" + ) diff --git a/searx/results.py b/searx/results.py index 10a26aa3f..6ab751c56 100644 --- a/searx/results.py +++ b/searx/results.py @@ -47,12 +47,8 @@ def compare_urls(url_a, url_b): return False # remove / from the end of the url if required - path_a = url_a.path[:-1]\ - if url_a.path.endswith('/')\ - else url_a.path - path_b = url_b.path[:-1]\ - if url_b.path.endswith('/')\ - else url_b.path + path_a = url_a.path[:-1] if url_a.path.endswith('/') else url_a.path + path_b = url_b.path[:-1] if url_b.path.endswith('/') else url_b.path return unquote(path_a) == unquote(path_b) @@ -83,8 +79,9 @@ def merge_two_infoboxes(infobox1, infobox2): parsed_url2 = urlparse(url2.get('url', '')) entity_url2 = url2.get('entity') for url1 in urls1: - if (entity_url2 is not None and url1.get('entity') == entity_url2)\ - or compare_urls(urlparse(url1.get('url', '')), parsed_url2): + if (entity_url2 is not None and url1.get('entity') == entity_url2) or compare_urls( + urlparse(url1.get('url', '')), parsed_url2 + ): unique_url = False break if unique_url: @@ -115,8 +112,7 @@ def merge_two_infoboxes(infobox1, infobox2): attributeSet.add(entity) for attribute in infobox2.get('attributes', []): - if attribute.get('label') not in attributeSet\ - and attribute.get('entity') not in attributeSet: + if attribute.get('label') not in attributeSet and attribute.get('entity') not in attributeSet: attributes1.append(attribute) if 'content' in infobox2: @@ -144,9 +140,22 @@ def result_score(result): class ResultContainer: """docstring for ResultContainer""" - __slots__ = '_merged_results', 'infoboxes', 'suggestions', 'answers', 'corrections', '_number_of_results',\ - '_closed', 'paging', 'unresponsive_engines', 'timings', 'redirect_url', 'engine_data', 'on_result',\ - '_lock' + __slots__ = ( + '_merged_results', + 'infoboxes', + 'suggestions', + 'answers', + 'corrections', + '_number_of_results', + '_closed', + 'paging', + 'unresponsive_engines', + 'timings', + 'redirect_url', + 'engine_data', + 'on_result', + '_lock', + ) def __init__(self): super().__init__() @@ -208,8 +217,7 @@ class ResultContainer: if engine_name in engines: histogram_observe(standard_result_count, 'engine', engine_name, 'result', 'count') - if not self.paging and standard_result_count > 0 and engine_name in engines\ - and engines[engine_name].paging: + if not self.paging and standard_result_count > 0 and engine_name in engines and engines[engine_name].paging: self.paging = True def _merge_infobox(self, infobox): @@ -248,8 +256,7 @@ class ResultContainer: return True def _normalize_url_result(self, result): - """Return True if the result is valid - """ + """Return True if the result is valid""" result['parsed_url'] = urlparse(result['url']) # if the result has no scheme, use http as default @@ -280,8 +287,9 @@ class ResultContainer: for merged_result in self._merged_results: if 'parsed_url' not in merged_result: continue - if compare_urls(result['parsed_url'], merged_result['parsed_url'])\ - and result_template == merged_result.get('template'): + if compare_urls(result['parsed_url'], merged_result['parsed_url']) and result_template == merged_result.get( + 'template' + ): if result_template != 'images.html': # not an image, same template, same url : it's a duplicate return merged_result @@ -294,8 +302,7 @@ class ResultContainer: def __merge_duplicated_http_result(self, duplicated, result, position): # using content with more text - if result_content_len(result.get('content', '')) >\ - result_content_len(duplicated.get('content', '')): + if result_content_len(result.get('content', '')) > result_content_len(duplicated.get('content', '')): duplicated['content'] = result['content'] # merge all result's parameters not found in duplicate @@ -341,18 +348,20 @@ class ResultContainer: res['category'] = engine.categories[0] if len(engine.categories) > 0 else '' # FIXME : handle more than one category per engine - category = res['category']\ - + ':' + res.get('template', '')\ - + ':' + ('img_src' if 'img_src' in res or 'thumbnail' in res else '') + category = ( + res['category'] + + ':' + + res.get('template', '') + + ':' + + ('img_src' if 'img_src' in res or 'thumbnail' in res else '') + ) - current = None if category not in categoryPositions\ - else categoryPositions[category] + current = None if category not in categoryPositions else categoryPositions[category] # group with previous results using the same category # if the group can accept more result and is not too far # from the current position - if current is not None and (current['count'] > 0)\ - and (len(gresults) - current['index'] < 20): + if current is not None and (current['count'] > 0) and (len(gresults) - current['index'] < 20): # group with the previous results using # the same category with this one index = current['index'] diff --git a/searx/search/__init__.py b/searx/search/__init__.py index 0a3c5b3ac..d66f3362d 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -123,8 +123,11 @@ class Search: # Max & user query: From user query except if above max actual_timeout = min(query_timeout, max_request_timeout) - logger.debug("actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})" - .format(actual_timeout, default_timeout, query_timeout, max_request_timeout)) + logger.debug( + "actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})".format( + actual_timeout, default_timeout, query_timeout, max_request_timeout + ) + ) return requests, actual_timeout diff --git a/searx/search/checker/__main__.py b/searx/search/checker/__main__.py index 4ce4ca76b..1311288f3 100644 --- a/searx/search/checker/__main__.py +++ b/searx/search/checker/__main__.py @@ -37,12 +37,12 @@ else: stdout = io.TextIOWrapper( # pylint: disable=consider-using-with open(sys.stdout.fileno(), 'wb', 0), - write_through=True + write_through=True, ) stderr = io.TextIOWrapper( # pylint: disable=consider-using-with - open(sys.stderr.fileno(), 'wb', 0) - , write_through=True + open(sys.stderr.fileno(), 'wb', 0), + write_through=True, ) @@ -91,12 +91,21 @@ def run(engine_name_list, verbose): # call by setup.py def main(): parser = argparse.ArgumentParser(description='Check searx engines.') - parser.add_argument('engine_name_list', metavar='engine name', type=str, nargs='*', - help='engines name or shortcut list. Empty for all engines.') - parser.add_argument('--verbose', '-v', - action='store_true', dest='verbose', - help='Display details about the test results', - default=False) + parser.add_argument( + 'engine_name_list', + metavar='engine name', + type=str, + nargs='*', + help='engines name or shortcut list. Empty for all engines.', + ) + parser.add_argument( + '--verbose', + '-v', + action='store_true', + dest='verbose', + help='Display details about the test results', + default=False, + ) args = parser.parse_args() run(args.engine_name_list, args.verbose) diff --git a/searx/search/checker/background.py b/searx/search/checker/background.py index d9f11a71c..ff005dd91 100644 --- a/searx/search/checker/background.py +++ b/searx/search/checker/background.py @@ -23,10 +23,12 @@ running = threading.Lock() def _get_interval(every, error_msg): if isinstance(every, int): every = (every, every) - if not isinstance(every, (tuple, list))\ - or len(every) != 2\ - or not isinstance(every[0], int)\ - or not isinstance(every[1], int): + if ( + not isinstance(every, (tuple, list)) + or len(every) != 2 + or not isinstance(every[0], int) + or not isinstance(every[1], int) + ): raise SearxSettingsException(error_msg, None) return every @@ -50,14 +52,11 @@ def _set_result(result, include_timestamp=True): def run(): - if not running.acquire(blocking=False): # pylint: disable=consider-using-with + if not running.acquire(blocking=False): # pylint: disable=consider-using-with return try: logger.info('Starting checker') - result = { - 'status': 'ok', - 'engines': {} - } + result = {'status': 'ok', 'engines': {}} for name, processor in PROCESSORS.items(): logger.debug('Checking %s engine', name) checker = Checker(processor) diff --git a/searx/search/checker/impl.py b/searx/search/checker/impl.py index 626aa8ce0..c0dd966d0 100644 --- a/searx/search/checker/impl.py +++ b/searx/search/checker/impl.py @@ -25,6 +25,7 @@ from searx.metrics import counter_inc logger = logger.getChild('searx.search.checker') HTML_TAGS = [ + # fmt: off 'embed', 'iframe', 'object', 'param', 'picture', 'source', 'svg', 'math', 'canvas', 'noscript', 'script', 'del', 'ins', 'area', 'audio', 'img', 'map', 'track', 'video', 'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn', 'em', 'i', 'kdb', 'mark', 'q', 'rb', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'small', @@ -32,6 +33,7 @@ HTML_TAGS = [ 'figcaption', 'figure', 'hr', 'li', 'ol', 'p', 'pre', 'ul', 'button', 'datalist', 'fieldset', 'form', 'input', 'label', 'legend', 'meter', 'optgroup', 'option', 'output', 'progress', 'select', 'textarea', 'applet', 'frame', 'frameset' + # fmt: on ] @@ -72,17 +74,23 @@ def _download_and_check_if_image(image_url: str) -> bool: try: # use "image_proxy" (avoid HTTP/2) network.set_context_network_name('image_proxy') - stream = network.stream('GET', image_url, timeout=10.0, allow_redirects=True, headers={ - 'User-Agent': gen_useragent(), - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', - 'Accept-Language': 'en-US;q=0.5,en;q=0.3', - 'Accept-Encoding': 'gzip, deflate, br', - 'DNT': '1', - 'Connection': 'keep-alive', - 'Upgrade-Insecure-Requests': '1', - 'Sec-GPC': '1', - 'Cache-Control': 'max-age=0' - }) + stream = network.stream( + 'GET', + image_url, + timeout=10.0, + allow_redirects=True, + headers={ + 'User-Agent': gen_useragent(), + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'en-US;q=0.5,en;q=0.3', + 'Accept-Encoding': 'gzip, deflate, br', + 'DNT': '1', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + 'Sec-GPC': '1', + 'Cache-Control': 'max-age=0', + }, + ) r = next(stream) r.close() if r.status_code == 200: @@ -102,8 +110,7 @@ def _download_and_check_if_image(image_url: str) -> bool: def _is_url_image(image_url) -> bool: - """Normalize image_url - """ + """Normalize image_url""" if not isinstance(image_url, str): return False @@ -129,8 +136,9 @@ def _search_query_to_dict(search_query: SearchQuery) -> typing.Dict[str, typing. } -def _search_query_diff(sq1: SearchQuery, sq2: SearchQuery)\ - -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]: +def _search_query_diff( + sq1: SearchQuery, sq2: SearchQuery +) -> typing.Tuple[typing.Dict[str, typing.Any], typing.Dict[str, typing.Any]]: param1 = _search_query_to_dict(sq1) param2 = _search_query_to_dict(sq2) common = {} @@ -180,11 +188,9 @@ class ResultContainerTests: __slots__ = 'test_name', 'search_query', 'result_container', 'languages', 'stop_test', 'test_results' - def __init__(self, - test_results: TestResults, - test_name: str, - search_query: SearchQuery, - result_container: ResultContainer): + def __init__( + self, test_results: TestResults, test_name: str, search_query: SearchQuery, result_container: ResultContainer + ): self.test_name = test_name self.search_query = search_query self.result_container = result_container @@ -324,10 +330,9 @@ class CheckerTests: __slots__ = 'test_results', 'test_name', 'result_container_tests_list' - def __init__(self, - test_results: TestResults, - test_name: str, - result_container_tests_list: typing.List[ResultContainerTests]): + def __init__( + self, test_results: TestResults, test_name: str, result_container_tests_list: typing.List[ResultContainerTests] + ): self.test_results = test_results self.test_name = test_name self.result_container_tests_list = result_container_tests_list @@ -340,14 +345,17 @@ class CheckerTests: for i, urls_i in enumerate(urls_list): for j, urls_j in enumerate(urls_list): if i < j and urls_i == urls_j: - common, diff = _search_query_diff(self.result_container_tests_list[i].search_query, - self.result_container_tests_list[j].search_query) + common, diff = _search_query_diff( + self.result_container_tests_list[i].search_query, + self.result_container_tests_list[j].search_query, + ) common_str = ' '.join(['{}={!r}'.format(k, v) for k, v in common.items()]) - diff1_str = ', ' .join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()]) - diff2_str = ', ' .join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()]) - self.test_results.add_error(self.test_name, - 'results are identitical for {} and {} ({})' - .format(diff1_str, diff2_str, common_str)) + diff1_str = ', '.join(['{}={!r}'.format(k, v1) for (k, (v1, v2)) in diff.items()]) + diff2_str = ', '.join(['{}={!r}'.format(k, v2) for (k, (v1, v2)) in diff.items()]) + self.test_results.add_error( + self.test_name, + 'results are identitical for {} and {} ({})'.format(diff1_str, diff2_str, common_str), + ) class Checker: @@ -393,9 +401,10 @@ class Checker: elif isinstance(method, types.FunctionType): method(*args) else: - self.test_results.add_error(obj.test_name, - 'method {!r} ({}) not found for {}' - .format(method, method.__class__.__name__, obj.__class__.__name__)) + self.test_results.add_error( + obj.test_name, + 'method {!r} ({}) not found for {}'.format(method, method.__class__.__name__, obj.__class__.__name__), + ) def call_tests(self, obj, test_descriptions): for test_description in test_descriptions: diff --git a/searx/search/models.py b/searx/search/models.py index e48cb3611..ff5897966 100644 --- a/searx/search/models.py +++ b/searx/search/models.py @@ -25,19 +25,30 @@ class EngineRef: class SearchQuery: """container for all the search parameters (query, language, etc...)""" - __slots__ = 'query', 'engineref_list', 'lang', 'safesearch', 'pageno', 'time_range',\ - 'timeout_limit', 'external_bang', 'engine_data' + __slots__ = ( + 'query', + 'engineref_list', + 'lang', + 'safesearch', + 'pageno', + 'time_range', + 'timeout_limit', + 'external_bang', + 'engine_data', + ) - def __init__(self, - query: str, - engineref_list: typing.List[EngineRef], - lang: str='all', - safesearch: int=0, - pageno: int=1, - time_range: typing.Optional[str]=None, - timeout_limit: typing.Optional[float]=None, - external_bang: typing.Optional[str]=None, - engine_data: typing.Optional[typing.Dict[str, str]]=None): + def __init__( + self, + query: str, + engineref_list: typing.List[EngineRef], + lang: str = 'all', + safesearch: int = 0, + pageno: int = 1, + time_range: typing.Optional[str] = None, + timeout_limit: typing.Optional[float] = None, + external_bang: typing.Optional[str] = None, + engine_data: typing.Optional[typing.Dict[str, str]] = None, + ): self.query = query self.engineref_list = engineref_list self.lang = lang @@ -53,20 +64,39 @@ class SearchQuery: return list(set(map(lambda engineref: engineref.category, self.engineref_list))) def __repr__(self): - return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".\ - format(self.query, self.engineref_list, self.lang, self.safesearch, - self.pageno, self.time_range, self.timeout_limit, self.external_bang) + return "SearchQuery({!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r}, {!r})".format( + self.query, + self.engineref_list, + self.lang, + self.safesearch, + self.pageno, + self.time_range, + self.timeout_limit, + self.external_bang, + ) def __eq__(self, other): - return self.query == other.query\ - and self.engineref_list == other.engineref_list\ - and self.lang == other.lang\ - and self.safesearch == other.safesearch\ - and self.pageno == other.pageno\ - and self.time_range == other.time_range\ - and self.timeout_limit == other.timeout_limit\ + return ( + self.query == other.query + and self.engineref_list == other.engineref_list + and self.lang == other.lang + and self.safesearch == other.safesearch + and self.pageno == other.pageno + and self.time_range == other.time_range + and self.timeout_limit == other.timeout_limit and self.external_bang == other.external_bang + ) def __hash__(self): - return hash((self.query, tuple(self.engineref_list), self.lang, self.safesearch, self.pageno, self.time_range, - self.timeout_limit, self.external_bang)) + return hash( + ( + self.query, + tuple(self.engineref_list), + self.lang, + self.safesearch, + self.pageno, + self.time_range, + self.timeout_limit, + self.external_bang, + ) + ) diff --git a/searx/search/processors/__init__.py b/searx/search/processors/__init__.py index 8108f8dfa..966b990ec 100644 --- a/searx/search/processors/__init__.py +++ b/searx/search/processors/__init__.py @@ -29,6 +29,7 @@ logger = logger.getChild('search.processors') PROCESSORS = {} """Cache request processores, stored by *engine-name* (:py:func:`initialize`)""" + def get_processor_class(engine_type): """Return processor class according to the ``engine_type``""" for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]: diff --git a/searx/search/processors/abstract.py b/searx/search/processors/abstract.py index b5fa063fd..732b55d52 100644 --- a/searx/search/processors/abstract.py +++ b/searx/search/processors/abstract.py @@ -19,6 +19,7 @@ from searx.utils import get_engine_from_settings logger = logger.getChild('searx.search.processor') SUSPENDED_STATUS = {} + class SuspendedStatus: """Class to handle suspend state.""" @@ -39,8 +40,10 @@ class SuspendedStatus: # update continuous_errors / suspend_end_time self.continuous_errors += 1 if suspended_time is None: - suspended_time = min(settings['search']['max_ban_time_on_fail'], - self.continuous_errors * settings['search']['ban_time_on_fail']) + suspended_time = min( + settings['search']['max_ban_time_on_fail'], + self.continuous_errors * settings['search']['ban_time_on_fail'], + ) self.suspend_end_time = default_timer() + suspended_time self.suspend_reason = suspend_reason logger.debug('Suspend for %i seconds', suspended_time) @@ -127,9 +130,9 @@ class EngineProcessor(ABC): def extend_container_if_suspended(self, result_container): if self.suspended_status.is_suspended: - result_container.add_unresponsive_engine(self.engine_name, - self.suspended_status.suspend_reason, - suspended=True) + result_container.add_unresponsive_engine( + self.engine_name, self.suspended_status.suspend_reason, suspended=True + ) return True return False diff --git a/searx/search/processors/offline.py b/searx/search/processors/offline.py index ec7a4a36e..13f077cb1 100644 --- a/searx/search/processors/offline.py +++ b/searx/search/processors/offline.py @@ -23,6 +23,6 @@ class OfflineProcessor(EngineProcessor): except ValueError as e: # do not record the error self.logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e)) - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except self.handle_exception(result_container, e) self.logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e)) diff --git a/searx/search/processors/online.py b/searx/search/processors/online.py index c4ee58e11..8d8275df1 100644 --- a/searx/search/processors/online.py +++ b/searx/search/processors/online.py @@ -23,6 +23,7 @@ from .abstract import EngineProcessor def default_request_params(): """Default request parameters for ``online`` engines.""" return { + # fmt: off 'method': 'GET', 'headers': {}, 'data': {}, @@ -30,6 +31,7 @@ def default_request_params(): 'cookies': {}, 'verify': True, 'auth': None + # fmt: on } @@ -64,10 +66,7 @@ class OnlineProcessor(EngineProcessor): # create dictionary which contain all # informations about the request request_args = dict( - headers=params['headers'], - cookies=params['cookies'], - verify=params['verify'], - auth=params['auth'] + headers=params['headers'], cookies=params['cookies'], verify=params['verify'], auth=params['auth'] ) # max_redirects @@ -103,10 +102,12 @@ class OnlineProcessor(EngineProcessor): status_code = str(response.status_code or '') reason = response.reason_phrase or '' hostname = response.url.host - count_error(self.engine_name, - '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), - (status_code, reason, hostname), - secondary=True) + count_error( + self.engine_name, + '{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects), + (status_code, reason, hostname), + secondary=True, + ) return response @@ -145,22 +146,16 @@ class OnlineProcessor(EngineProcessor): # requests timeout (connect or read) self.handle_exception(result_container, e, suspend=True) self.logger.error( - "HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}" - .format( - default_timer() - start_time, - timeout_limit, - e.__class__.__name__ + "HTTP requests timeout (search duration : {0} s, timeout: {1} s) : {2}".format( + default_timer() - start_time, timeout_limit, e.__class__.__name__ ) ) except (httpx.HTTPError, httpx.StreamError) as e: # other requests exception self.handle_exception(result_container, e, suspend=True) self.logger.exception( - "requests exception (search duration : {0} s, timeout: {1} s) : {2}" - .format( - default_timer() - start_time, - timeout_limit, - e + "requests exception (search duration : {0} s, timeout: {1} s) : {2}".format( + default_timer() - start_time, timeout_limit, e ) ) except SearxEngineCaptchaException as e: @@ -186,10 +181,9 @@ class OnlineProcessor(EngineProcessor): if getattr(self.engine, 'paging', False): tests['paging'] = { - 'matrix': {'query': 'time', - 'pageno': (1, 2, 3)}, + 'matrix': {'query': 'time', 'pageno': (1, 2, 3)}, 'result_container': ['not_empty'], - 'test': ['unique_results'] + 'test': ['unique_results'], } if 'general' in self.engine.categories: # avoid documentation about HTML tags (

    youtubeSecond Test

    ', # noqa - result.data + result.data, ) self.assertIn( - b'

    second test content

    ', # noqa - result.data + b'

    second test content

    ', result.data # noqa ) def test_index_json(self): @@ -151,7 +149,7 @@ class ViewsTestCase(SearxTestCase): b'title,url,content,host,engine,score,type\r\n' b'First Test,http://first.test.xyz,first test content,first.test.xyz,startpage,,result\r\n' # noqa b'Second Test,http://second.test.xyz,second test content,second.test.xyz,youtube,,result\r\n', # noqa - result.data + result.data, ) def test_index_rss(self): @@ -161,30 +159,15 @@ class ViewsTestCase(SearxTestCase): def test_search_rss(self): result = self.app.post('/search', data={'q': 'test', 'format': 'rss'}) - self.assertIn( - b'Search results for "test" - searx', - result.data - ) + self.assertIn(b'Search results for "test" - searx', result.data) - self.assertIn( - b'3', - result.data - ) + self.assertIn(b'3', result.data) - self.assertIn( - b'First Test', - result.data - ) + self.assertIn(b'First Test', result.data) - self.assertIn( - b'http://first.test.xyz', - result.data - ) + self.assertIn(b'http://first.test.xyz', result.data) - self.assertIn( - b'first test content', - result.data - ) + self.assertIn(b'first test content', result.data) def test_about(self): result = self.app.get('/about') @@ -199,18 +182,9 @@ class ViewsTestCase(SearxTestCase): def test_preferences(self): result = self.app.get('/preferences') self.assertEqual(result.status_code, 200) - self.assertIn( - b'
    ', - result.data - ) - self.assertIn( - b'', - result.data - ) - self.assertIn( - b'', - result.data - ) + self.assertIn(b'', result.data) + self.assertIn(b'', result.data) + self.assertIn(b'', result.data) def test_browser_locale(self): result = self.app.get('/preferences', headers={'Accept-Language': 'zh-tw;q=0.8'}) @@ -218,30 +192,26 @@ class ViewsTestCase(SearxTestCase): self.assertIn( b'