1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-19 02:40:11 +01:00

[pylint] engines: yahoo fix several issues reported from pylint

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2021-10-16 13:05:53 +02:00 committed by Markus Heiser
parent 858cfc0f3b
commit 38a157b56f

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: AGPL-3.0-or-later # SPDX-License-Identifier: AGPL-3.0-or-later
""" # lint: pylint
Yahoo (Web) """Yahoo (Web)
""" """
from urllib.parse import unquote, urlencode from urllib.parse import unquote, urlencode
@ -36,12 +37,17 @@ title_xpath = './/h3/a'
content_xpath = './/div[contains(@class, "compText")]' content_xpath = './/div[contains(@class, "compText")]'
suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a" suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
time_range_dict = {'day': ['1d', 'd'], time_range_dict = {
'day': ['1d', 'd'],
'week': ['1w', 'w'], 'week': ['1w', 'w'],
'month': ['1m', 'm']} 'month': ['1m', 'm']
}
language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'}
language_aliases = {
'zh-CN': 'zh-CHS',
'zh-TW': 'zh-CHT',
'zh-HK': 'zh-CHT'
}
# remove yahoo-specific tracking-url # remove yahoo-specific tracking-url
def parse_url(url_string): def parse_url(url_string):
@ -56,22 +62,24 @@ def parse_url(url_string):
if start == 0 or len(endpositions) == 0: if start == 0 or len(endpositions) == 0:
return url_string return url_string
else:
end = min(endpositions) end = min(endpositions)
return unquote(url_string[start:end]) return unquote(url_string[start:end])
def _get_url(query, offset, language, time_range): def _get_url(query, offset, language, time_range):
if time_range in time_range_dict: if time_range in time_range_dict:
return base_url + search_url_with_time.format(offset=offset, return base_url + search_url_with_time.format(
offset = offset,
query = urlencode({'p': query}), query = urlencode({'p': query}),
lang = language, lang = language,
age = time_range_dict[time_range][0], age = time_range_dict[time_range][0],
btf=time_range_dict[time_range][1]) btf = time_range_dict[time_range][1]
return base_url + search_url.format(offset=offset, )
return base_url + search_url.format(
offset=offset,
query=urlencode({'p': query}), query=urlencode({'p': query}),
lang=language) lang=language
)
def _get_language(params): def _get_language(params):
if params['language'] == 'all': if params['language'] == 'all':
@ -95,10 +103,6 @@ def request(query, params):
params['url'] = _get_url(query, offset, language, params['time_range']) params['url'] = _get_url(query, offset, language, params['time_range'])
# TODO required?
params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\
.format(lang=language)
return params return params
@ -109,10 +113,14 @@ def response(resp):
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
try: try:
results_num = int(eval_xpath(dom, '//div[@class="compPagination"]/span[last()]/text()')[0] results_num = int(
.split()[0].replace(',', '')) eval_xpath(
dom,
'//div[@class="compPagination"]/span[last()]/text()'
)[0].split()[0].replace(',', '')
)
results.append({'number_of_results': results_num}) results.append({'number_of_results': results_num})
except: except: # pylint: disable=bare-except
pass pass
# parse results # parse results
@ -120,15 +128,18 @@ def response(resp):
try: try:
url = parse_url(extract_url(eval_xpath(result, url_xpath), search_url)) url = parse_url(extract_url(eval_xpath(result, url_xpath), search_url))
title = extract_text(eval_xpath(result, title_xpath)[0]) title = extract_text(eval_xpath(result, title_xpath)[0])
except:
except: # pylint: disable=bare-except
continue continue
content = extract_text(eval_xpath(result, content_xpath)[0]) content = extract_text(eval_xpath(result, content_xpath)[0])
# append result # append result
results.append({'url': url, results.append({
'url': url,
'title': title, 'title': title,
'content': content}) 'content': content
})
# if no suggestion found, return results # if no suggestion found, return results
suggestions = eval_xpath(dom, suggestion_xpath) suggestions = eval_xpath(dom, suggestion_xpath)