1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-19 02:40:11 +01:00

[fix] startpage engine - avoid captcha

Startpage has introduced new anti-scraping measures that make SearXNG instances
run into captchas:

1. some arguments has been removed and a new `sc` has been added.
2. search path changed from `do/search` to `sp/search`
3. POST request is no longer needed

Closes: https://github.com/searxng/searxng/issues/692
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-01-05 13:00:52 +01:00
parent 79e0aa2645
commit f1f5e69c42

View File

@ -3,6 +3,8 @@
Startpage (Web)
"""
from urllib.parse import urlencode
from lxml import html
from dateutil import parser
from datetime import datetime, timedelta
@ -33,7 +35,7 @@ supported_languages_url = 'https://www.startpage.com/do/settings'
# search-url
base_url = 'https://startpage.com/'
search_url = base_url + 'do/search'
search_url = base_url + 'sp/search?'
# specific xpath variables
# ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
@ -46,14 +48,12 @@ content_xpath = './/p[@class="w-gl__description"]'
# do search-request
def request(query, params):
params['url'] = search_url
params['method'] = 'POST'
params['data'] = {
args = {
'query': query,
'page': params['pageno'],
'cat': 'web',
'cmd': 'process_search',
'engine0': 'v1all',
# 'abp': "-1",
'sc': 'Mj4jZy61QETj20',
}
# set language if specified
@ -61,9 +61,10 @@ def request(query, params):
lang_code = match_language(params['language'], supported_languages, fallback=None)
if lang_code:
language_name = supported_languages[lang_code]['alias']
params['data']['language'] = language_name
params['data']['lui'] = language_name
args['language'] = language_name
args['lui'] = language_name
params['url'] = search_url + urlencode(args)
return params