1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-22 20:17:45 +01:00

Merge branch 'master' of https://github.com/asciimoo/searx into boilerplate

This commit is contained in:
Markus Heiser 2019-12-03 11:38:52 +01:00
commit 1b90e1403b
24 changed files with 293 additions and 126 deletions

View File

@ -1,10 +1,17 @@
FROM alpine:3.10 FROM alpine:3.10
ENTRYPOINT ["/sbin/tini","--","/usr/local/searx/dockerfiles/docker-entrypoint.sh"]
EXPOSE 8080
VOLUME /etc/searx
VOLUME /var/log/uwsgi
ARG VERSION_GITCOMMIT=unknow ARG VERSION_GITCOMMIT=unknown
ARG SEARX_GIT_VERSION=unknow ARG SEARX_GIT_VERSION=unknown
ARG SEARX_GID=1000 ARG SEARX_GID=977
ARG SEARX_UID=1000 ARG SEARX_UID=977
RUN addgroup -g ${SEARX_GID} searx && \
adduser -u ${SEARX_UID} -D -h /usr/local/searx -s /bin/sh -G searx searx
ARG TIMESTAMP_SETTINGS=0 ARG TIMESTAMP_SETTINGS=0
ARG TIMESTAMP_UWSGI=0 ARG TIMESTAMP_UWSGI=0
@ -16,19 +23,14 @@ ENV INSTANCE_NAME=searx \
BASE_URL= \ BASE_URL= \
MORTY_KEY= \ MORTY_KEY= \
MORTY_URL= MORTY_URL=
EXPOSE 8080
VOLUME /etc/searx
VOLUME /var/log/uwsgi
WORKDIR /usr/local/searx WORKDIR /usr/local/searx
RUN addgroup -g ${SEARX_GID} searx && \
adduser -u ${SEARX_UID} -D -h /usr/local/searx -s /bin/sh -G searx searx
COPY requirements.txt ./requirements.txt COPY requirements.txt ./requirements.txt
RUN apk -U upgrade \ RUN apk upgrade --no-cache \
&& apk add -t build-dependencies \ && apk add --no-cache -t build-dependencies \
build-base \ build-base \
py3-setuptools \ py3-setuptools \
python3-dev \ python3-dev \
@ -38,7 +40,7 @@ RUN apk -U upgrade \
openssl-dev \ openssl-dev \
tar \ tar \
git \ git \
&& apk add \ && apk add --no-cache \
ca-certificates \ ca-certificates \
su-exec \ su-exec \
python3 \ python3 \
@ -50,8 +52,7 @@ RUN apk -U upgrade \
uwsgi-python3 \ uwsgi-python3 \
&& pip3 install --upgrade pip \ && pip3 install --upgrade pip \
&& pip3 install --no-cache -r requirements.txt \ && pip3 install --no-cache -r requirements.txt \
&& apk del build-dependencies \ && apk del build-dependencies
&& rm -f /var/cache/apk/*
COPY --chown=searx:searx . . COPY --chown=searx:searx . .
@ -62,7 +63,6 @@ RUN su searx -c "/usr/bin/python3 -m compileall -q searx"; \
echo "VERSION_STRING = VERSION_STRING + \"-$VERSION_GITCOMMIT\"" >> /usr/local/searx/searx/version.py; \ echo "VERSION_STRING = VERSION_STRING + \"-$VERSION_GITCOMMIT\"" >> /usr/local/searx/searx/version.py; \
fi fi
ENTRYPOINT ["/sbin/tini","--","/usr/local/searx/dockerfiles/docker-entrypoint.sh"]
# Keep this argument at the end since it change each time # Keep this argument at the end since it change each time
ARG LABEL_DATE= ARG LABEL_DATE=

View File

@ -18,7 +18,7 @@ from lxml import html
from searx import logger, utils from searx import logger, utils
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import match_language, gen_useragent from searx.utils import match_language, gen_useragent, eval_xpath
logger = logger.getChild('bing engine') logger = logger.getChild('bing engine')
@ -65,11 +65,11 @@ def response(resp):
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
# parse results # parse results
for result in dom.xpath('//div[@class="sa_cc"]'): for result in eval_xpath(dom, '//div[@class="sa_cc"]'):
link = result.xpath('.//h3/a')[0] link = eval_xpath(result, './/h3/a')[0]
url = link.attrib.get('href') url = link.attrib.get('href')
title = extract_text(link) title = extract_text(link)
content = extract_text(result.xpath('.//p')) content = extract_text(eval_xpath(result, './/p'))
# append result # append result
results.append({'url': url, results.append({'url': url,
@ -77,11 +77,11 @@ def response(resp):
'content': content}) 'content': content})
# parse results again if nothing is found yet # parse results again if nothing is found yet
for result in dom.xpath('//li[@class="b_algo"]'): for result in eval_xpath(dom, '//li[@class="b_algo"]'):
link = result.xpath('.//h2/a')[0] link = eval_xpath(result, './/h2/a')[0]
url = link.attrib.get('href') url = link.attrib.get('href')
title = extract_text(link) title = extract_text(link)
content = extract_text(result.xpath('.//p')) content = extract_text(eval_xpath(result, './/p'))
# append result # append result
results.append({'url': url, results.append({'url': url,
@ -89,7 +89,7 @@ def response(resp):
'content': content}) 'content': content})
try: try:
result_len_container = "".join(dom.xpath('//span[@class="sb_count"]/text()')) result_len_container = "".join(eval_xpath(dom, '//span[@class="sb_count"]/text()'))
result_len_container = utils.to_string(result_len_container) result_len_container = utils.to_string(result_len_container)
if "-" in result_len_container: if "-" in result_len_container:
# Remove the part "from-to" for paginated request ... # Remove the part "from-to" for paginated request ...
@ -113,9 +113,9 @@ def response(resp):
def _fetch_supported_languages(resp): def _fetch_supported_languages(resp):
supported_languages = [] supported_languages = []
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="limit-languages"]//input') options = eval_xpath(dom, '//div[@id="limit-languages"]//input')
for option in options: for option in options:
code = option.xpath('./@id')[0].replace('_', '-') code = eval_xpath(option, './@id')[0].replace('_', '-')
if code == 'nb': if code == 'nb':
code = 'no' code = 'no'
supported_languages.append(code) supported_languages.append(code)

View File

@ -11,7 +11,7 @@
import re import re
from lxml import html from lxml import html
from searx.utils import is_valid_lang from searx.utils import is_valid_lang, eval_xpath
from searx.url_utils import urljoin from searx.url_utils import urljoin
categories = ['general'] categories = ['general']
@ -47,14 +47,14 @@ def response(resp):
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
for k, result in enumerate(dom.xpath(results_xpath)[1:]): for k, result in enumerate(eval_xpath(dom, results_xpath)[1:]):
try: try:
from_result, to_results_raw = result.xpath('./td') from_result, to_results_raw = eval_xpath(result, './td')
except: except:
continue continue
to_results = [] to_results = []
for to_result in to_results_raw.xpath('./p/a'): for to_result in eval_xpath(to_results_raw, './p/a'):
t = to_result.text_content() t = to_result.text_content()
if t.strip(): if t.strip():
to_results.append(to_result.text_content()) to_results.append(to_result.text_content())

View File

@ -11,6 +11,7 @@
from lxml.html import fromstring from lxml.html import fromstring
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import eval_xpath
from searx.url_utils import urlencode from searx.url_utils import urlencode
# engine dependent config # engine dependent config
@ -45,16 +46,16 @@ def response(resp):
# parse results # parse results
# Quickhits # Quickhits
for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'): for r in eval_xpath(doc, '//div[@class="search_quickresult"]/ul/li'):
try: try:
res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] res_url = eval_xpath(r, './/a[@class="wikilink1"]/@href')[-1]
except: except:
continue continue
if not res_url: if not res_url:
continue continue
title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
# append result # append result
results.append({'title': title, results.append({'title': title,
@ -62,13 +63,13 @@ def response(resp):
'url': base_url + res_url}) 'url': base_url + res_url})
# Search results # Search results
for r in doc.xpath('//dl[@class="search_results"]/*'): for r in eval_xpath(doc, '//dl[@class="search_results"]/*'):
try: try:
if r.tag == "dt": if r.tag == "dt":
res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1] res_url = eval_xpath(r, './/a[@class="wikilink1"]/@href')[-1]
title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title')) title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
elif r.tag == "dd": elif r.tag == "dd":
content = extract_text(r.xpath('.')) content = extract_text(eval_xpath(r, '.'))
# append result # append result
results.append({'title': title, results.append({'title': title,

View File

@ -18,7 +18,7 @@ from json import loads
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.poolrequests import get from searx.poolrequests import get
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import match_language from searx.utils import match_language, eval_xpath
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
@ -106,19 +106,19 @@ def response(resp):
doc = fromstring(resp.text) doc = fromstring(resp.text)
# parse results # parse results
for i, r in enumerate(doc.xpath(result_xpath)): for i, r in enumerate(eval_xpath(doc, result_xpath)):
if i >= 30: if i >= 30:
break break
try: try:
res_url = r.xpath(url_xpath)[-1] res_url = eval_xpath(r, url_xpath)[-1]
except: except:
continue continue
if not res_url: if not res_url:
continue continue
title = extract_text(r.xpath(title_xpath)) title = extract_text(eval_xpath(r, title_xpath))
content = extract_text(r.xpath(content_xpath)) content = extract_text(eval_xpath(r, content_xpath))
# append result # append result
results.append({'title': title, results.append({'title': title,

View File

@ -1,3 +1,14 @@
"""
DuckDuckGo (definitions)
- `Instant Answer API`_
- `DuckDuckGo query`_
.. _Instant Answer API: https://duckduckgo.com/api
.. _DuckDuckGo query: https://api.duckduckgo.com/?q=DuckDuckGo&format=json&pretty=1
"""
import json import json
from lxml import html from lxml import html
from re import compile from re import compile
@ -25,7 +36,8 @@ def result_to_text(url, text, htmlResult):
def request(query, params): def request(query, params):
params['url'] = url.format(query=urlencode({'q': query})) params['url'] = url.format(query=urlencode({'q': query}))
language = match_language(params['language'], supported_languages, language_aliases) language = match_language(params['language'], supported_languages, language_aliases)
params['headers']['Accept-Language'] = language.split('-')[0] language = language.split('-')[0]
params['headers']['Accept-Language'] = language
return params return params
@ -43,7 +55,8 @@ def response(resp):
# add answer if there is one # add answer if there is one
answer = search_res.get('Answer', '') answer = search_res.get('Answer', '')
if answer != '': if answer:
if search_res.get('AnswerType', '') not in ['calc']:
results.append({'answer': html_to_text(answer)}) results.append({'answer': html_to_text(answer)})
# add infobox # add infobox

View File

@ -11,6 +11,7 @@
from lxml import html, etree from lxml import html, etree
import re import re
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import eval_xpath
from searx.url_utils import quote, urljoin from searx.url_utils import quote, urljoin
from searx import logger from searx import logger
@ -52,9 +53,9 @@ def response(resp):
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
try: try:
number_of_results_string = re.sub('[^0-9]', '', dom.xpath( number_of_results_string =\
'//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()')[0] re.sub('[^0-9]', '',
) eval_xpath(dom, '//a[@class="active" and contains(@href,"/suchen/dudenonline")]/span/text()')[0])
results.append({'number_of_results': int(number_of_results_string)}) results.append({'number_of_results': int(number_of_results_string)})
@ -62,12 +63,12 @@ def response(resp):
logger.debug("Couldn't read number of results.") logger.debug("Couldn't read number of results.")
pass pass
for result in dom.xpath('//section[not(contains(@class, "essay"))]'): for result in eval_xpath(dom, '//section[not(contains(@class, "essay"))]'):
try: try:
url = result.xpath('.//h2/a')[0].get('href') url = eval_xpath(result, './/h2/a')[0].get('href')
url = urljoin(base_url, url) url = urljoin(base_url, url)
title = result.xpath('string(.//h2/a)').strip() title = eval_xpath(result, 'string(.//h2/a)').strip()
content = extract_text(result.xpath('.//p')) content = extract_text(eval_xpath(result, './/p'))
# append result # append result
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,

View File

@ -15,6 +15,7 @@ from json import loads
from time import time from time import time
from lxml.html import fromstring from lxml.html import fromstring
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import eval_xpath
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
@ -99,9 +100,9 @@ def response(resp):
def _fetch_supported_languages(resp): def _fetch_supported_languages(resp):
supported_languages = [] supported_languages = []
dom = fromstring(resp.text) dom = fromstring(resp.text)
links = dom.xpath('//span[@id="menu2"]/a') links = eval_xpath(dom, '//span[@id="menu2"]/a')
for link in links: for link in links:
href = link.xpath('./@href')[0].split('lang%3A') href = eval_xpath(link, './@href')[0].split('lang%3A')
if len(href) == 2: if len(href) == 2:
code = href[1].split('_') code = href[1].split('_')
if len(code) == 2: if len(code) == 2:

View File

@ -14,7 +14,7 @@ from lxml import html, etree
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx import logger from searx import logger
from searx.url_utils import urlencode, urlparse, parse_qsl from searx.url_utils import urlencode, urlparse, parse_qsl
from searx.utils import match_language from searx.utils import match_language, eval_xpath
logger = logger.getChild('google engine') logger = logger.getChild('google engine')
@ -156,7 +156,7 @@ def parse_url(url_string, google_hostname):
# returns extract_text on the first result selected by the xpath or None # returns extract_text on the first result selected by the xpath or None
def extract_text_from_dom(result, xpath): def extract_text_from_dom(result, xpath):
r = result.xpath(xpath) r = eval_xpath(result, xpath)
if len(r) > 0: if len(r) > 0:
return extract_text(r[0]) return extract_text(r[0])
return None return None
@ -227,21 +227,21 @@ def response(resp):
# convert the text to dom # convert the text to dom
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
instant_answer = dom.xpath('//div[@id="_vBb"]//text()') instant_answer = eval_xpath(dom, '//div[@id="_vBb"]//text()')
if instant_answer: if instant_answer:
results.append({'answer': u' '.join(instant_answer)}) results.append({'answer': u' '.join(instant_answer)})
try: try:
results_num = int(dom.xpath('//div[@id="resultStats"]//text()')[0] results_num = int(eval_xpath(dom, '//div[@id="resultStats"]//text()')[0]
.split()[1].replace(',', '')) .split()[1].replace(',', ''))
results.append({'number_of_results': results_num}) results.append({'number_of_results': results_num})
except: except:
pass pass
# parse results # parse results
for result in dom.xpath(results_xpath): for result in eval_xpath(dom, results_xpath):
try: try:
title = extract_text(result.xpath(title_xpath)[0]) title = extract_text(eval_xpath(result, title_xpath)[0])
url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname) url = parse_url(extract_url(eval_xpath(result, url_xpath), google_url), google_hostname)
parsed_url = urlparse(url, google_hostname) parsed_url = urlparse(url, google_hostname)
# map result # map result
@ -250,7 +250,7 @@ def response(resp):
continue continue
# if parsed_url.path.startswith(maps_path) or parsed_url.netloc.startswith(map_hostname_start): # if parsed_url.path.startswith(maps_path) or parsed_url.netloc.startswith(map_hostname_start):
# print "yooooo"*30 # print "yooooo"*30
# x = result.xpath(map_near) # x = eval_xpath(result, map_near)
# if len(x) > 0: # if len(x) > 0:
# # map : near the location # # map : near the location
# results = results + parse_map_near(parsed_url, x, google_hostname) # results = results + parse_map_near(parsed_url, x, google_hostname)
@ -287,11 +287,11 @@ def response(resp):
continue continue
# parse suggestion # parse suggestion
for suggestion in dom.xpath(suggestion_xpath): for suggestion in eval_xpath(dom, suggestion_xpath):
# append suggestion # append suggestion
results.append({'suggestion': extract_text(suggestion)}) results.append({'suggestion': extract_text(suggestion)})
for correction in dom.xpath(spelling_suggestion_xpath): for correction in eval_xpath(dom, spelling_suggestion_xpath):
results.append({'correction': extract_text(correction)}) results.append({'correction': extract_text(correction)})
# return results # return results
@ -300,9 +300,9 @@ def response(resp):
def parse_images(result, google_hostname): def parse_images(result, google_hostname):
results = [] results = []
for image in result.xpath(images_xpath): for image in eval_xpath(result, images_xpath):
url = parse_url(extract_text(image.xpath(image_url_xpath)[0]), google_hostname) url = parse_url(extract_text(eval_xpath(image, image_url_xpath)[0]), google_hostname)
img_src = extract_text(image.xpath(image_img_src_xpath)[0]) img_src = extract_text(eval_xpath(image, image_img_src_xpath)[0])
# append result # append result
results.append({'url': url, results.append({'url': url,
@ -389,10 +389,10 @@ def attributes_to_html(attributes):
def _fetch_supported_languages(resp): def _fetch_supported_languages(resp):
supported_languages = {} supported_languages = {}
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
options = dom.xpath('//*[@id="langSec"]//input[@name="lr"]') options = eval_xpath(dom, '//*[@id="langSec"]//input[@name="lr"]')
for option in options: for option in options:
code = option.xpath('./@value')[0].split('_')[-1] code = eval_xpath(option, './@value')[0].split('_')[-1]
name = option.xpath('./@data-name')[0].title() name = eval_xpath(option, './@data-name')[0].title()
supported_languages[code] = {"name": name} supported_languages[code] = {"name": name}
return supported_languages return supported_languages

78
searx/engines/seedpeer.py Normal file
View File

@ -0,0 +1,78 @@
# Seedpeer (Videos, Music, Files)
#
# @website https://seedpeer.me
# @provide-api no (nothing found)
#
# @using-api no
# @results HTML (using search portal)
# @stable yes (HTML can change)
# @parse url, title, content, seed, leech, magnetlink
from lxml import html
from json import loads
from operator import itemgetter
from searx.url_utils import quote, urljoin
from searx.engines.xpath import extract_text
url = 'https://seedpeer.me/'
search_url = url + 'search/{search_term}?page={page_no}'
torrent_file_url = url + 'torrent/{torrent_hash}'
# specific xpath variables
script_xpath = '//script[@type="text/javascript"][not(@src)]'
torrent_xpath = '(//table)[2]/tbody/tr'
link_xpath = '(./td)[1]/a/@href'
age_xpath = '(./td)[2]'
size_xpath = '(./td)[3]'
# do search-request
def request(query, params):
params['url'] = search_url.format(search_term=quote(query),
page_no=params['pageno'])
return params
# get response from search-request
def response(resp):
results = []
dom = html.fromstring(resp.text)
result_rows = dom.xpath(torrent_xpath)
try:
script_element = dom.xpath(script_xpath)[0]
json_string = script_element.text[script_element.text.find('{'):]
torrents_json = loads(json_string)
except:
return []
# parse results
for torrent_row, torrent_json in zip(result_rows, torrents_json['data']['list']):
title = torrent_json['name']
seed = int(torrent_json['seeds'])
leech = int(torrent_json['peers'])
size = int(torrent_json['size'])
torrent_hash = torrent_json['hash']
torrentfile = torrent_file_url.format(torrent_hash=torrent_hash)
magnetlink = 'magnet:?xt=urn:btih:{}'.format(torrent_hash)
age = extract_text(torrent_row.xpath(age_xpath))
link = torrent_row.xpath(link_xpath)[0]
href = urljoin(url, link)
# append result
results.append({'url': href,
'title': title,
'content': age,
'seed': seed,
'leech': leech,
'filesize': size,
'torrentfile': torrentfile,
'magnetlink': magnetlink,
'template': 'torrent.html'})
# return results sorted by seeder
return sorted(results, key=itemgetter('seed'), reverse=True)

View File

@ -51,7 +51,9 @@ def get_client_id():
if response.ok: if response.ok:
tree = html.fromstring(response.content) tree = html.fromstring(response.content)
script_tags = tree.xpath("//script[contains(@src, '/assets/app')]") # script_tags has been moved from /assets/app/ to /assets/ path. I
# found client_id in https://a-v2.sndcdn.com/assets/49-a0c01933-3.js
script_tags = tree.xpath("//script[contains(@src, '/assets/')]")
app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
# extracts valid app_js urls from soundcloud.com content # extracts valid app_js urls from soundcloud.com content

View File

@ -16,6 +16,7 @@ from datetime import datetime, timedelta
import re import re
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.languages import language_codes from searx.languages import language_codes
from searx.utils import eval_xpath
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
@ -70,8 +71,8 @@ def response(resp):
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
# parse results # parse results
for result in dom.xpath(results_xpath): for result in eval_xpath(dom, results_xpath):
links = result.xpath(link_xpath) links = eval_xpath(result, link_xpath)
if not links: if not links:
continue continue
link = links[0] link = links[0]
@ -87,8 +88,8 @@ def response(resp):
title = extract_text(link) title = extract_text(link)
if result.xpath(content_xpath): if eval_xpath(result, content_xpath):
content = extract_text(result.xpath(content_xpath)) content = extract_text(eval_xpath(result, content_xpath))
else: else:
content = '' content = ''

View File

@ -16,7 +16,7 @@ from searx.poolrequests import get
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import match_language from searx.utils import match_language, eval_xpath
from json import loads from json import loads
from lxml.html import fromstring from lxml.html import fromstring
@ -57,22 +57,6 @@ language_fallback_xpath = '//sup[contains(@class,"wb-language-fallback-indicator
calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]' calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
media_xpath = value_xpath + '//div[contains(@class,"commons-media-caption")]//a' media_xpath = value_xpath + '//div[contains(@class,"commons-media-caption")]//a'
# xpath_cache
xpath_cache = {}
def get_xpath(xpath_str):
result = xpath_cache.get(xpath_str, None)
if not result:
result = etree.XPath(xpath_str)
xpath_cache[xpath_str] = result
return result
def eval_xpath(element, xpath_str):
xpath = get_xpath(xpath_str)
return xpath(element)
def get_id_cache(result): def get_id_cache(result):
id_cache = {} id_cache = {}

View File

@ -1,6 +1,6 @@
from lxml import html from lxml import html
from lxml.etree import _ElementStringResult, _ElementUnicodeResult from lxml.etree import _ElementStringResult, _ElementUnicodeResult
from searx.utils import html_to_text from searx.utils import html_to_text, eval_xpath
from searx.url_utils import unquote, urlencode, urljoin, urlparse from searx.url_utils import unquote, urlencode, urljoin, urlparse
search_url = None search_url = None
@ -104,15 +104,15 @@ def response(resp):
results = [] results = []
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
if results_xpath: if results_xpath:
for result in dom.xpath(results_xpath): for result in eval_xpath(dom, results_xpath):
url = extract_url(result.xpath(url_xpath), search_url) url = extract_url(eval_xpath(result, url_xpath), search_url)
title = extract_text(result.xpath(title_xpath)) title = extract_text(eval_xpath(result, title_xpath))
content = extract_text(result.xpath(content_xpath)) content = extract_text(eval_xpath(result, content_xpath))
tmp_result = {'url': url, 'title': title, 'content': content} tmp_result = {'url': url, 'title': title, 'content': content}
# add thumbnail if available # add thumbnail if available
if thumbnail_xpath: if thumbnail_xpath:
thumbnail_xpath_result = result.xpath(thumbnail_xpath) thumbnail_xpath_result = eval_xpath(result, thumbnail_xpath)
if len(thumbnail_xpath_result) > 0: if len(thumbnail_xpath_result) > 0:
tmp_result['img_src'] = extract_url(thumbnail_xpath_result, search_url) tmp_result['img_src'] = extract_url(thumbnail_xpath_result, search_url)
@ -120,14 +120,14 @@ def response(resp):
else: else:
for url, title, content in zip( for url, title, content in zip(
(extract_url(x, search_url) for (extract_url(x, search_url) for
x in dom.xpath(url_xpath)), x in eval_xpath(dom, url_xpath)),
map(extract_text, dom.xpath(title_xpath)), map(extract_text, eval_xpath(dom, title_xpath)),
map(extract_text, dom.xpath(content_xpath)) map(extract_text, eval_xpath(dom, content_xpath))
): ):
results.append({'url': url, 'title': title, 'content': content}) results.append({'url': url, 'title': title, 'content': content})
if not suggestion_xpath: if not suggestion_xpath:
return results return results
for suggestion in dom.xpath(suggestion_xpath): for suggestion in eval_xpath(dom, suggestion_xpath):
results.append({'suggestion': extract_text(suggestion)}) results.append({'suggestion': extract_text(suggestion)})
return results return results

View File

@ -14,7 +14,7 @@
from lxml import html from lxml import html
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx.url_utils import unquote, urlencode from searx.url_utils import unquote, urlencode
from searx.utils import match_language from searx.utils import match_language, eval_xpath
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
@ -109,21 +109,21 @@ def response(resp):
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
try: try:
results_num = int(dom.xpath('//div[@class="compPagination"]/span[last()]/text()')[0] results_num = int(eval_xpath(dom, '//div[@class="compPagination"]/span[last()]/text()')[0]
.split()[0].replace(',', '')) .split()[0].replace(',', ''))
results.append({'number_of_results': results_num}) results.append({'number_of_results': results_num})
except: except:
pass pass
# parse results # parse results
for result in dom.xpath(results_xpath): for result in eval_xpath(dom, results_xpath):
try: try:
url = parse_url(extract_url(result.xpath(url_xpath), search_url)) url = parse_url(extract_url(eval_xpath(result, url_xpath), search_url))
title = extract_text(result.xpath(title_xpath)[0]) title = extract_text(eval_xpath(result, title_xpath)[0])
except: except:
continue continue
content = extract_text(result.xpath(content_xpath)[0]) content = extract_text(eval_xpath(result, content_xpath)[0])
# append result # append result
results.append({'url': url, results.append({'url': url,
@ -131,7 +131,7 @@ def response(resp):
'content': content}) 'content': content})
# if no suggestion found, return results # if no suggestion found, return results
suggestions = dom.xpath(suggestion_xpath) suggestions = eval_xpath(dom, suggestion_xpath)
if not suggestions: if not suggestions:
return results return results
@ -148,9 +148,9 @@ def response(resp):
def _fetch_supported_languages(resp): def _fetch_supported_languages(resp):
supported_languages = [] supported_languages = []
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="yschlang"]/span/label/input') options = eval_xpath(dom, '//div[@id="yschlang"]/span/label/input')
for option in options: for option in options:
code_parts = option.xpath('./@value')[0][5:].split('_') code_parts = eval_xpath(option, './@value')[0][5:].split('_')
if len(code_parts) == 2: if len(code_parts) == 2:
code = code_parts[0] + '-' + code_parts[1].upper() code = code_parts[0] + '-' + code_parts[1].upper()
else: else:

View File

@ -67,8 +67,9 @@ def merge_two_infoboxes(infobox1, infobox2):
for url2 in infobox2.get('urls', []): for url2 in infobox2.get('urls', []):
unique_url = True unique_url = True
for url1 in infobox1.get('urls', []): parsed_url2 = urlparse(url2.get('url', ''))
if compare_urls(urlparse(url1.get('url', '')), urlparse(url2.get('url', ''))): for url1 in urls1:
if compare_urls(urlparse(url1.get('url', '')), parsed_url2):
unique_url = False unique_url = False
break break
if unique_url: if unique_url:
@ -188,8 +189,9 @@ class ResultContainer(object):
add_infobox = True add_infobox = True
infobox_id = infobox.get('id', None) infobox_id = infobox.get('id', None)
if infobox_id is not None: if infobox_id is not None:
parsed_url_infobox_id = urlparse(infobox_id)
for existingIndex in self.infoboxes: for existingIndex in self.infoboxes:
if compare_urls(urlparse(existingIndex.get('id', '')), urlparse(infobox_id)): if compare_urls(urlparse(existingIndex.get('id', '')), parsed_url_infobox_id):
merge_two_infoboxes(existingIndex, infobox) merge_two_infoboxes(existingIndex, infobox)
add_infobox = False add_infobox = False

View File

@ -748,6 +748,11 @@ engines:
page_size : 10 page_size : 10
disabled : True disabled : True
- name : seedpeer
shortcut : speu
engine : seedpeer
categories: files, music, videos
# - name : yacy # - name : yacy
# engine : yacy # engine : yacy
# shortcut : ya # shortcut : ya

View File

@ -4,7 +4,7 @@
{% endif %} {% endif %}
<h3 class="result_title"><a href="{{ result.url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ result.title|safe }}</a></h3> <h3 class="result_title"><a href="{{ result.url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ result.title|safe }}</a></h3>
{% if result.content %}<span class="content">{{ result.content|safe }}</span><br />{% endif %} {% if result.content %}<span class="content">{{ result.content|safe }}</span><br />{% endif %}
{% if result.seed %}<span class="stats">{{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}</span><br />{% endif %} {% if result.seed is defined %}<span class="stats">{{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}</span><br />{% endif %}
<span> <span>
{% if result.magnetlink %}<a href="{{ result.magnetlink }}" class="magnetlink">{{ _('magnet link') }}</a>{% endif %} {% if result.magnetlink %}<a href="{{ result.magnetlink }}" class="magnetlink">{{ _('magnet link') }}</a>{% endif %}
{% if result.torrentfile %}<a href="{{ result.torrentfile }}" class="torrentfile" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('torrent file') }}</a>{% endif %} {% if result.torrentfile %}<a href="{{ result.torrentfile }}" class="torrentfile" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('torrent file') }}</a>{% endif %}

View File

@ -8,6 +8,6 @@
<p> <p>
{% if result.magnetlink %}<a href="{{ result.magnetlink }}" class="magnetlink">{{ _('magnet link') }}</a>{% endif %} {% if result.magnetlink %}<a href="{{ result.magnetlink }}" class="magnetlink">{{ _('magnet link') }}</a>{% endif %}
{% if result.torrentfile %}<a href="{{ result.torrentfile }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %} class="torrentfile">{{ _('torrent file') }}</a>{% endif %} - {% if result.torrentfile %}<a href="{{ result.torrentfile }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %} class="torrentfile">{{ _('torrent file') }}</a>{% endif %} -
{% if result.seed %}<span class="stats">{{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}</span>{% endif %} {% if result.seed is defined %}<span class="stats">{{ _('Seeder') }} : {{ result.seed }}, {{ _('Leecher') }} : {{ result.leech }}</span>{% endif %}
</p> </p>
</div> </div>

View File

@ -3,7 +3,7 @@
{{ result_header(result, favicons) }} {{ result_header(result, favicons) }}
{{ result_sub_header(result) }} {{ result_sub_header(result) }}
{% if result.seed %}<p class="result-content">{{ icon('transfer') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> &bull; {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span>{% endif %} {% if result.seed is defined %}<p class="result-content">{{ icon('transfer') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> &bull; {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span>{% endif %}
{% if result.filesize %}<br />{{ icon('floppy-disk') }} {{ _('Filesize') }} {% if result.filesize %}<br />{{ icon('floppy-disk') }} {{ _('Filesize') }}
<span class="badge"> <span class="badge">
{% if result.filesize < 1024 %}{{ result.filesize }} {{ _('Bytes') }} {% if result.filesize < 1024 %}{{ result.filesize }} {{ _('Bytes') }}

View File

@ -6,7 +6,7 @@
{% if result.magnetlink %}<p class="altlink"> &bull; {{ result_link(result.magnetlink, icon('magnet') + _('magnet link'), "magnetlink") }}</p>{% endif %} {% if result.magnetlink %}<p class="altlink"> &bull; {{ result_link(result.magnetlink, icon('magnet') + _('magnet link'), "magnetlink") }}</p>{% endif %}
{% if result.torrentfile %}<p class="altlink"> &bull; {{ result_link(result.torrentfile, icon('download-alt') + _('torrent file'), "torrentfile") }}</p>{% endif %} {% if result.torrentfile %}<p class="altlink"> &bull; {{ result_link(result.torrentfile, icon('download-alt') + _('torrent file'), "torrentfile") }}</p>{% endif %}
{% if result.seed %}<p class="stat"> &bull; {{ icon('arrow-swap') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> &bull; {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span></p>{% endif %} {% if result.seed is defined %}<p class="stat"> &bull; {{ icon('arrow-swap') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> &bull; {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span></p>{% endif %}
{%- if result.filesize %}<p class="stat">{{ icon('floppy-disk') }} {{ _('Filesize') }}<span class="badge"> {%- if result.filesize %}<p class="stat">{{ icon('floppy-disk') }} {{ _('Filesize') }}<span class="badge">
{%- if result.filesize < 1024 %}{{ result.filesize }} {{ _('Bytes') }} {%- if result.filesize < 1024 %}{{ result.filesize }} {{ _('Bytes') }}

View File

@ -13,6 +13,7 @@ from numbers import Number
from os.path import splitext, join from os.path import splitext, join
from io import open from io import open
from random import choice from random import choice
from lxml.etree import XPath
import sys import sys
import json import json
@ -51,6 +52,7 @@ ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE)
useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__)) useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
+ "/data/useragents.json", 'r', encoding='utf-8').read()) + "/data/useragents.json", 'r', encoding='utf-8').read())
xpath_cache = dict()
lang_to_lc_cache = dict() lang_to_lc_cache = dict()
@ -450,3 +452,16 @@ def get_engine_from_settings(name):
return engine return engine
return {} return {}
def get_xpath(xpath_str):
result = xpath_cache.get(xpath_str, None)
if result is None:
result = XPath(xpath_str)
xpath_cache[xpath_str] = result
return result
def eval_xpath(element, xpath_str):
xpath = get_xpath(xpath_str)
return xpath(element)

View File

@ -157,20 +157,18 @@ outgoing_proxies = settings['outgoing'].get('proxies') or None
@babel.localeselector @babel.localeselector
def get_locale(): def get_locale():
locale = request.accept_languages.best_match(settings['locales'].keys()) if 'locale' in request.form\
and request.form['locale'] in settings['locales']:
if request.preferences.get_value('locale') != '': return request.form['locale']
locale = request.preferences.get_value('locale')
if 'locale' in request.args\ if 'locale' in request.args\
and request.args['locale'] in settings['locales']: and request.args['locale'] in settings['locales']:
locale = request.args['locale'] return request.args['locale']
if 'locale' in request.form\ if request.preferences.get_value('locale') != '':
and request.form['locale'] in settings['locales']: return request.preferences.get_value('locale')
locale = request.form['locale']
return locale return request.accept_languages.best_match(settings['locales'].keys())
# code-highlighter # code-highlighter

View File

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
from collections import defaultdict
import mock
from searx.engines import seedpeer
from searx.testing import SearxTestCase
class TestBtdiggEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 1
params = seedpeer.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
self.assertIn('seedpeer', params['url'])
def test_response(self):
self.assertRaises(AttributeError, seedpeer.response, None)
self.assertRaises(AttributeError, seedpeer.response, [])
self.assertRaises(AttributeError, seedpeer.response, '')
self.assertRaises(AttributeError, seedpeer.response, '[]')
response = mock.Mock(text='<html></html>')
self.assertEqual(seedpeer.response(response), [])
html = u"""
<html>
<head>
<script></script>
<script type="text/javascript" src="not_here.js"></script>
<script type="text/javascript">
window.initialData=
{"data": {"list": [{"name": "Title", "seeds": "10", "peers": "20", "size": "1024", "hash": "abc123"}]}}
</script>
</head>
<body>
<table></table>
<table>
<thead><tr></tr></thead>
<tbody>
<tr>
<td><a href="link">Title</a></td>
<td>1 year</td>
<td>1 KB</td>
<td>10</td>
<td>20</td>
<td></td>
</tr>
</tbody>
</table>
</body>
</html>
"""
response = mock.Mock(text=html)
results = seedpeer.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]['title'], 'Title')
self.assertEqual(results[0]['url'], 'https://seedpeer.me/link')
self.assertEqual(results[0]['seed'], 10)
self.assertEqual(results[0]['leech'], 20)
self.assertEqual(results[0]['filesize'], 1024)
self.assertEqual(results[0]['torrentfile'], 'https://seedpeer.me/torrent/abc123')
self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:abc123')