1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-22 12:10:11 +01:00

[enh] py3 compatibility

This commit is contained in:
Adam Tauber 2016-11-30 18:43:03 +01:00
parent 46a2c63f8e
commit 52e615dede
115 changed files with 517 additions and 513 deletions

View File

@ -9,6 +9,7 @@ addons:
language: python
python:
- "2.7"
- "3.6"
before_install:
- "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start"
@ -24,9 +25,9 @@ script:
- ./manage.sh styles
- ./manage.sh grunt_build
- ./manage.sh tests
- ./manage.sh py_test_coverage
after_success:
coveralls
- ./manage.sh py_test_coverage
- coveralls
notifications:
irc:
channels:

View File

@ -3,8 +3,7 @@ mock==2.0.0
nose2[coverage-plugin]
pep8==1.7.0
plone.testing==5.0.0
robotframework-selenium2library==1.8.0
robotsuite==1.7.0
splinter==0.7.5
transifex-client==0.12.2
unittest2==1.1.0
zope.testrunner==4.5.1

View File

@ -1,8 +1,12 @@
from os import listdir
from os.path import realpath, dirname, join, isdir
from sys import version_info
from searx.utils import load_module
from collections import defaultdict
if version_info[0] == 3:
unicode = str
answerers_dir = dirname(realpath(__file__))
@ -10,7 +14,7 @@ answerers_dir = dirname(realpath(__file__))
def load_answerers():
answerers = []
for filename in listdir(answerers_dir):
if not isdir(join(answerers_dir, filename)):
if not isdir(join(answerers_dir, filename)) or filename.startswith('_'):
continue
module = load_module('answerer.py', join(answerers_dir, filename))
if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
@ -30,12 +34,12 @@ def get_answerers_by_keywords(answerers):
def ask(query):
results = []
query_parts = filter(None, query.query.split())
query_parts = list(filter(None, query.query.split()))
if query_parts[0] not in answerers_by_keywords:
if query_parts[0].decode('utf-8') not in answerers_by_keywords:
return results
for answerer in answerers_by_keywords[query_parts[0]]:
for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
result = answerer(query)
if result:
results.append(result)

View File

@ -1,5 +1,6 @@
import random
import string
import sys
from flask_babel import gettext
# required answerer attribute
@ -8,7 +9,11 @@ keywords = ('random',)
random_int_max = 2**31
random_string_letters = string.lowercase + string.digits + string.uppercase
if sys.version_info[0] == 2:
random_string_letters = string.lowercase + string.digits + string.uppercase
else:
unicode = str
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
def random_string():
@ -24,9 +29,9 @@ def random_int():
return unicode(random.randint(-random_int_max, random_int_max))
random_types = {u'string': random_string,
u'int': random_int,
u'float': random_float}
random_types = {b'string': random_string,
b'int': random_int,
b'float': random_float}
# required answerer function

View File

@ -1,8 +1,12 @@
from sys import version_info
from functools import reduce
from operator import mul
from flask_babel import gettext
if version_info[0] == 3:
unicode = str
keywords = ('min',
'max',
'avg',
@ -19,22 +23,22 @@ def answer(query):
return []
try:
args = map(float, parts[1:])
args = list(map(float, parts[1:]))
except:
return []
func = parts[0]
answer = None
if func == 'min':
if func == b'min':
answer = min(args)
elif func == 'max':
elif func == b'max':
answer = max(args)
elif func == 'avg':
elif func == b'avg':
answer = sum(args) / len(args)
elif func == 'sum':
elif func == b'sum':
answer = sum(args)
elif func == 'prod':
elif func == b'prod':
answer = reduce(mul, args, 1)
if answer is None:

View File

@ -18,7 +18,6 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
from lxml import etree
from json import loads
from urllib import urlencode
from searx import settings
from searx.languages import language_codes
from searx.engines import (
@ -26,6 +25,11 @@ from searx.engines import (
)
from searx.poolrequests import get as http_get
try:
from urllib import urlencode
except:
from urllib.parse import urlencode
def get(*args, **kwargs):
if 'timeout' not in kwargs:

View File

@ -1,8 +1,7 @@
from urllib import quote
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
from urlparse import urljoin
from searx.url_utils import quote, urljoin
url = 'https://1337x.to/'
search_url = url + 'search/{search_term}/{pageno}/'

View File

@ -72,12 +72,11 @@ def load_engine(engine_data):
if engine_data['categories'] == 'none':
engine.categories = []
else:
engine.categories = map(
str.strip, engine_data['categories'].split(','))
engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
continue
setattr(engine, param_name, engine_data[param_name])
for arg_name, arg_value in engine_default_args.iteritems():
for arg_name, arg_value in engine_default_args.items():
if not hasattr(engine, arg_name):
setattr(engine, arg_name, arg_value)

View File

@ -11,10 +11,9 @@
@parse url, title
"""
from urlparse import urljoin
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['it']

View File

@ -14,10 +14,10 @@
"""
from lxml import etree
from urllib import urlencode
from searx.utils import searx_useragent
from datetime import datetime
import re
from searx.url_utils import urlencode
from searx.utils import searx_useragent
categories = ['science']
@ -73,7 +73,7 @@ def request(query, params):
def response(resp):
results = []
search_results = etree.XML(resp.content)
search_results = etree.XML(resp.text)
for entry in search_results.xpath('./result/doc'):
content = "No description available"

View File

@ -13,9 +13,9 @@
@todo publishedDate
"""
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['general']

View File

@ -15,11 +15,11 @@
limited response to 10 images
"""
from urllib import urlencode
from lxml import html
from json import loads
import re
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
# engine dependent config
categories = ['images']

View File

@ -11,13 +11,12 @@
@parse url, title, content, publishedDate, thumbnail
"""
from urllib import urlencode
from urlparse import urlparse, parse_qsl
from datetime import datetime
from dateutil import parser
from lxml import etree
from searx.utils import list_get
from searx.engines.bing import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode, urlparse, parse_qsl
# engine dependent config
categories = ['news']
@ -86,7 +85,7 @@ def request(query, params):
def response(resp):
results = []
rss = etree.fromstring(resp.content)
rss = etree.fromstring(resp.text)
ns = rss.nsmap

View File

@ -11,7 +11,7 @@
"""
from json import loads
from urllib import urlencode
from searx.url_utils import urlencode
# engine dependent config
categories = ['images']

View File

@ -10,11 +10,10 @@
@parse url, title, content, seed, leech, magnetlink
"""
from urlparse import urljoin
from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
from searx.url_utils import quote, urljoin
from searx.utils import get_torrent_size
# engine dependent config
@ -38,7 +37,7 @@ def request(query, params):
def response(resp):
results = []
dom = html.fromstring(resp.content)
dom = html.fromstring(resp.text)
search_res = dom.xpath('//div[@id="search_res"]/table/tr')

View File

@ -1,21 +1,25 @@
from datetime import datetime
import json
import re
import os
import json
import sys
import unicodedata
from datetime import datetime
if sys.version_info[0] == 3:
unicode = str
categories = []
url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
weight = 100
parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) # noqa
parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
db = 1
def normalize_name(name):
name = name.lower().replace('-', ' ').rstrip('s')
name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
name = re.sub(' +', ' ', name)
return unicodedata.normalize('NFKD', name).lower()
@ -35,7 +39,7 @@ def iso4217_to_name(iso4217, language):
def request(query, params):
m = parser_re.match(unicode(query, 'utf8'))
m = parser_re.match(query)
if not m:
# wrong query
return params

View File

@ -12,10 +12,9 @@
@todo set content-parameter with correct data
"""
from urllib import urlencode
from json import loads
from datetime import datetime
from requests import get
from searx.url_utils import urlencode
# engine dependent config
categories = ['videos']

View File

@ -11,7 +11,7 @@
"""
from json import loads
from urllib import urlencode
from searx.url_utils import urlencode
# engine dependent config
categories = ['music']
@ -30,8 +30,7 @@ embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true"
def request(query, params):
offset = (params['pageno'] - 1) * 25
params['url'] = search_url.format(query=urlencode({'q': query}),
offset=offset)
params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
return params

View File

@ -12,10 +12,10 @@
@todo rewrite to api
"""
from urllib import urlencode
from lxml import html
import re
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['images']

View File

@ -10,20 +10,20 @@
"""
import re
from urlparse import urljoin
from lxml import html
from searx.utils import is_valid_lang
from searx.url_utils import urljoin
categories = ['general']
url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
weight = 100
parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
results_xpath = './/table[@id="r"]/tr'
def request(query, params):
m = parser_re.match(unicode(query, 'utf8'))
m = parser_re.match(query)
if not m:
return params

View File

@ -10,10 +10,14 @@
@parse url, title, content, magnetlink
"""
from urlparse import urljoin
from sys import version_info
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
from searx.url_utils import urljoin
if version_info[0] == 3:
unicode = str
categories = ['videos', 'music', 'files']
paging = True
@ -31,7 +35,7 @@ def request(query, params):
def response(resp):
dom = html.fromstring(resp.content)
dom = html.fromstring(resp.text)
search_res = dom.xpath('.//td[@class="x-item"]')
if not search_res:

View File

@ -10,10 +10,10 @@
@parse url, title, content, publishedDate, thumbnail
"""
from urllib import quote_plus
from dateutil import parser
from json import loads
from lxml import html
from dateutil import parser
from searx.url_utils import quote_plus
# engine dependent config
categories = ['news', 'social media']

View File

@ -9,9 +9,9 @@
# @stable yes
# @parse (general) url, title, content
from urllib import urlencode
from lxml.html import fromstring
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'

View File

@ -13,11 +13,11 @@
@todo rewrite to api
"""
from urllib import urlencode
from lxml.html import fromstring
from requests import get
from json import loads
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['general']

View File

@ -1,10 +1,10 @@
import json
from urllib import urlencode
from re import compile, sub
from lxml import html
from searx.utils import html_to_text
from re import compile
from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
from searx.utils import html_to_text
url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'

View File

@ -10,10 +10,10 @@
@parse url, title, content, publishedDate, img_src
"""
from urllib import urlencode
from json import loads
import datetime
from searx.utils import searx_useragent
from searx.url_utils import urlencode
# engine dependent config
categories = ['general', 'news']

View File

@ -9,9 +9,9 @@
@parse url, title, content
"""
from urllib import urlencode
from searx.engines.xpath import extract_text
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['files']
@ -24,8 +24,7 @@ search_url = base_url + 'repository/browse/?{query}'
# do search-request
def request(query, params):
query = urlencode({'fdfilter': query,
'fdpage': params['pageno']})
query = urlencode({'fdfilter': query, 'fdpage': params['pageno']})
params['url'] = search_url.format(query=query)
return params

View File

@ -1,5 +1,9 @@
from urllib import urlencode
from HTMLParser import HTMLParser
from searx.url_utils import urlencode
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
url = 'http://www.filecrop.com/'
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa
@ -73,8 +77,7 @@ class FilecropResultParser(HTMLParser):
def request(query, params):
index = 1 + (params['pageno'] - 1) * 30
params['url'] = search_url.format(query=urlencode({'w': query}),
index=index)
params['url'] = search_url.format(query=urlencode({'w': query}), index=index)
return params

View File

@ -13,8 +13,8 @@
More info on api-key : https://www.flickr.com/services/apps/create/
"""
from urllib import urlencode
from json import loads
from searx.url_utils import urlencode
categories = ['images']

View File

@ -12,11 +12,11 @@
@parse url, title, thumbnail, img_src
"""
from urllib import urlencode
from json import loads
from time import time
import re
from searx.engines import logger
from searx.url_utils import urlencode
logger = logger.getChild('flickr-noapi')

View File

@ -10,12 +10,10 @@
@parse url, title, content, thumbnail, img_src
"""
from urlparse import urljoin
from cgi import escape
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from dateutil import parser
from searx.url_utils import urljoin, urlencode
# engine dependent config
categories = ['it']

View File

@ -10,7 +10,7 @@ Frinkiac (Images)
"""
from json import loads
from urllib import urlencode
from searx.url_utils import urlencode
categories = ['images']

View File

@ -11,10 +11,9 @@
"""
from json import loads
from random import randint
from time import time
from urllib import urlencode
from lxml.html import fromstring
from searx.url_utils import urlencode
# engine dependent config
categories = ['general']

View File

@ -10,8 +10,8 @@
@parse url, title, content
"""
from urllib import urlencode
from json import loads
from searx.url_utils import urlencode
# engine dependent config
categories = ['it']

View File

@ -9,11 +9,10 @@
# @parse url, title, content, suggestion
import re
from urllib import urlencode
from urlparse import urlparse, parse_qsl
from lxml import html, etree
from searx.engines.xpath import extract_text, extract_url
from searx.search import logger
from searx import logger
from searx.url_utils import urlencode, urlparse, parse_qsl
logger = logger.getChild('google engine')

View File

@ -11,9 +11,9 @@
"""
from datetime import date, timedelta
from urllib import urlencode
from json import loads
from lxml import html
from searx.url_utils import urlencode
# engine dependent config

View File

@ -11,9 +11,8 @@
"""
from lxml import html
from urllib import urlencode
from json import loads
from searx.engines.google import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
# search-url
categories = ['news']

View File

@ -12,11 +12,15 @@
# @todo embedded (needs some md5 from video page)
from json import loads
from urllib import urlencode
from lxml import html
from HTMLParser import HTMLParser
from searx.engines.xpath import extract_text
from dateutil import parser
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
# engine dependent config
categories = ['videos']

View File

@ -1,11 +1,16 @@
from urllib import urlencode
from json import loads
from collections import Iterable
from json import loads
from sys import version_info
from searx.url_utils import urlencode
if version_info[0] == 3:
unicode = str
search_url = None
url_query = None
content_query = None
title_query = None
paging = False
suggestion_query = ''
results_query = ''
@ -20,7 +25,7 @@ first_page_num = 1
def iterate(iterable):
if type(iterable) == dict:
it = iterable.iteritems()
it = iterable.items()
else:
it = enumerate(iterable)

View File

@ -10,12 +10,11 @@
@parse url, title, content, seed, leech, magnetlink
"""
from urlparse import urljoin
from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size, convert_str_to_int
from searx.url_utils import quote, urljoin
# engine dependent config
categories = ['videos', 'music', 'files']

View File

@ -14,7 +14,7 @@
from json import loads
from string import Formatter
from urllib import urlencode, quote
from searx.url_utils import urlencode, quote
# engine dependent config
categories = ['general']

View File

@ -11,8 +11,8 @@
"""
from json import loads
from urllib import urlencode
from dateutil import parser
from searx.url_utils import urlencode
# engine dependent config
categories = ['music']

View File

@ -9,9 +9,9 @@
@parse url, title, content, seed, leech, torrentfile
"""
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['files', 'images', 'videos', 'music']

View File

@ -11,7 +11,6 @@
"""
from json import loads
from searx.utils import searx_useragent
# engine dependent config
categories = ['map']
@ -27,9 +26,6 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
def request(query, params):
params['url'] = base_url + search_string.format(query=query)
# using searx User-Agent
params['headers']['User-Agent'] = searx_useragent()
return params

View File

@ -10,9 +10,9 @@
@parse url, title
"""
from urllib import urlencode
from json import loads
from searx.utils import searx_useragent
from searx.url_utils import urlencode
# engine dependent config
categories = ['map']

View File

@ -8,11 +8,10 @@
# @stable yes (HTML can change)
# @parse url, title, content, seed, leech, magnetlink
from urlparse import urljoin
from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
from searx.url_utils import quote, urljoin
# engine dependent config
categories = ['videos', 'music', 'files']

View File

@ -12,9 +12,8 @@
from datetime import datetime
from json import loads
from urllib import urlencode
from searx.utils import html_to_text
from searx.url_utils import urlencode
# engine dependent config
categories = None

View File

@ -11,9 +11,8 @@
"""
import json
from urllib import urlencode
from urlparse import urlparse, urljoin
from datetime import datetime
from searx.url_utils import urlencode, urljoin, urlparse
# engine dependent config
categories = ['general', 'images', 'news', 'social media']
@ -26,8 +25,7 @@ search_url = base_url + 'search.json?{query}'
# do search-request
def request(query, params):
query = urlencode({'q': query,
'limit': page_size})
query = urlencode({'q': query, 'limit': page_size})
params['url'] = search_url.format(query=query)
return params

View File

@ -10,9 +10,7 @@
@parse url, title, content, img_src
"""
from urllib import urlencode
from json import loads, dumps
from dateutil import parser
from searx.utils import html_to_text
# engine dependent config
@ -48,7 +46,7 @@ def response(resp):
search_res = loads(resp.text)
# return empty array if there are no results
if search_res.get('total') < 1:
if search_res.get('total', 0) < 1:
return []
# parse results

View File

@ -10,8 +10,8 @@
@parse url, title, content
"""
from urllib import urlencode
from json import loads
from searx.url_utils import urlencode
# engine dependent config
@ -31,8 +31,7 @@ code_endings = {'cs': 'c#',
# do search-request
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}),
pageno=params['pageno'] - 1)
params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
return params

View File

@ -10,8 +10,8 @@
@parse url, title, content
"""
from urllib import urlencode
from json import loads
from searx.url_utils import urlencode
# engine dependent config
categories = ['it']
@ -24,8 +24,7 @@ search_url = url + 'api/search_IV/?{query}&p={pageno}'
# do search-request
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}),
pageno=params['pageno'] - 1)
params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
return params

View File

@ -8,11 +8,9 @@
# @stable yes (HTML can change)
# @parse url, title, content, seed, leech, magnetlink
from urlparse import urljoin
from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
from searx.url_utils import quote, urljoin
url = 'http://www.seedpeer.eu/'

View File

@ -11,13 +11,17 @@
"""
import re
from StringIO import StringIO
from json import loads
from lxml import etree
from urllib import urlencode, quote_plus
from lxml import html
from dateutil import parser
from searx import logger
from searx.poolrequests import get as http_get
from searx.url_utils import quote_plus, urlencode
try:
from cStringIO import StringIO
except:
from io import StringIO
# engine dependent config
categories = ['music']
@ -36,14 +40,15 @@ embedded_url = '<iframe width="100%" height="166" ' +\
'scrolling="no" frameborder="no" ' +\
'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
def get_client_id():
response = http_get("https://soundcloud.com")
rx_namespace = {"re": "http://exslt.org/regular-expressions"}
if response.ok:
tree = etree.parse(StringIO(response.content), etree.HTMLParser())
script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
tree = html.fromstring(response.content)
script_tags = tree.xpath("//script[contains(@src, '/assets/app')]")
app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
# extracts valid app_js urls from soundcloud.com content
@ -51,7 +56,7 @@ def get_client_id():
# gets app_js and searches for the clientid
response = http_get(app_js_url)
if response.ok:
cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
cids = cid_re.search(response.text)
if cids is not None and len(cids.groups()):
return cids.groups()[0]
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")

View File

@ -11,7 +11,7 @@
"""
from json import loads
from urllib import urlencode
from searx.url_utils import urlencode
# engine dependent config
categories = ['music']
@ -29,8 +29,7 @@ embedded_url = '<iframe data-src="https://embed.spotify.com/?uri=spotify:track:{
def request(query, params):
offset = (params['pageno'] - 1) * 20
params['url'] = search_url.format(query=urlencode({'q': query}),
offset=offset)
params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
return params

View File

@ -10,10 +10,9 @@
@parse url, title, content
"""
from urlparse import urljoin
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['it']
@ -31,8 +30,7 @@ content_xpath = './/div[@class="excerpt"]'
# do search-request
def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}),
pageno=params['pageno'])
params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'])
return params

View File

@ -56,7 +56,7 @@ def request(query, params):
def response(resp):
results = []
dom = html.fromstring(resp.content)
dom = html.fromstring(resp.text)
# parse results
for result in dom.xpath(results_xpath):

View File

@ -10,10 +10,10 @@
@parse url, title, content
"""
from urllib import quote_plus
from lxml import html
from searx.languages import language_codes
from searx.engines.xpath import extract_text
from searx.url_utils import quote_plus
# engine dependent config
categories = ['videos']

View File

@ -11,9 +11,9 @@
"""
from json import loads
from urllib import urlencode, unquote
import re
from lxml.html import fromstring
from searx.url_utils import unquote, urlencode
# engine dependent config
categories = ['general', 'images']
@ -27,10 +27,10 @@ search_string = '?{query}&page={page}'
supported_languages_url = base_url
# regex
regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
regex_json_remove_start = re.compile(r'^initialData:\s*')
regex_json_remove_end = re.compile(r',\s*environment$')
regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=')
regex_json = re.compile(b'initialData: {"Request":(.|\n)*},\s*environment')
regex_json_remove_start = re.compile(b'^initialData:\s*')
regex_json_remove_end = re.compile(b',\s*environment$')
regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
# do search-request
@ -45,10 +45,9 @@ def request(query, params):
ui_language = params['language'].split('-')[0]
search_path = search_string.format(
query=urlencode({'query': query,
'uiLanguage': ui_language,
'region': region}),
page=params['pageno'])
query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}),
page=params['pageno']
)
# image search query is something like 'image?{query}&page={page}'
if params['category'] == 'images':
@ -63,14 +62,14 @@ def request(query, params):
def response(resp):
results = []
json_regex = regex_json.search(resp.content)
json_regex = regex_json.search(resp.text)
# check if results are returned
if not json_regex:
return []
json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group()))
json = loads(json_raw)
json_raw = regex_json_remove_end.sub(b'', regex_json_remove_start.sub(b'', json_regex.group()))
json = loads(json_raw.decode('utf-8'))
# parse results
for result in json['Results'].get('items', []):
@ -78,7 +77,7 @@ def response(resp):
# parse image results
if result.get('ContentType', '').startswith('image'):
img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
# append result
results.append({'url': result['SourceUrl'],
@ -100,7 +99,7 @@ def response(resp):
# parse images
for result in json.get('Images', []):
# decode image url
img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
# append result
results.append({'url': result['SourceUrl'],

View File

@ -11,11 +11,11 @@
"""
import re
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from datetime import datetime
from searx.engines.nyaa import int_or_zero, get_filesize_mul
from searx.url_utils import urlencode
# engine dependent config
categories = ['files', 'videos', 'music']
@ -28,8 +28,7 @@ search_url = base_url + 'search.php?{query}'
# do search-request
def request(query, params):
query = urlencode({'page': params['pageno'],
'terms': query})
query = urlencode({'page': params['pageno'], 'terms': query})
params['url'] = search_url.format(query=query)
return params
@ -50,7 +49,7 @@ def response(resp):
size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
# processing the results, two rows at a time
for i in xrange(0, len(rows), 2):
for i in range(0, len(rows), 2):
# parse the first row
name_row = rows[i]
@ -79,14 +78,14 @@ def response(resp):
groups = size_re.match(item).groups()
multiplier = get_filesize_mul(groups[1])
params['filesize'] = int(multiplier * float(groups[0]))
except Exception as e:
except:
pass
elif item.startswith('Date:'):
try:
# Date: 2016-02-21 21:44 UTC
date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
params['publishedDate'] = date
except Exception as e:
except:
pass
elif item.startswith('Comment:'):
params['content'] = item

View File

@ -12,11 +12,11 @@
"""
import re
from urllib import urlencode
from lxml import html
from searx.engines.xpath import extract_text
from datetime import datetime
from searx.engines.nyaa import int_or_zero, get_filesize_mul
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config
categories = ['files', 'videos', 'music']
@ -70,7 +70,7 @@ def response(resp):
size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
size, suffix = size_str.split()
params['filesize'] = int(size) * get_filesize_mul(suffix)
except Exception as e:
except:
pass
# does our link contain a valid SHA1 sum?
@ -84,7 +84,7 @@ def response(resp):
# Fri, 25 Mar 2016 16:29:01
date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
params['publishedDate'] = date
except Exception as e:
except:
pass
results.append(params)

View File

@ -9,8 +9,12 @@
@parse url, title, content
"""
import re
from sys import version_info
from searx.utils import is_valid_lang
if version_info[0] == 3:
unicode = str
categories = ['general']
url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'

View File

@ -12,11 +12,10 @@
@todo publishedDate
"""
from urlparse import urljoin
from urllib import urlencode
from lxml import html
from datetime import datetime
from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['social media']

View File

@ -13,8 +13,8 @@
# @todo set content-parameter with correct data
from json import loads
from urllib import urlencode
from dateutil import parser
from searx.url_utils import urlencode
# engine dependent config
categories = ['videos']

View File

@ -14,12 +14,11 @@
from searx import logger
from searx.poolrequests import get
from searx.engines.xpath import extract_text
from searx.utils import format_date_by_locale
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
from json import loads
from lxml.html import fromstring
from urllib import urlencode
logger = logger.getChild('wikidata')
result_count = 1
@ -62,14 +61,13 @@ def request(query, params):
language = 'en'
params['url'] = url_search.format(
query=urlencode({'label': query,
'language': language}))
query=urlencode({'label': query, 'language': language}))
return params
def response(resp):
results = []
html = fromstring(resp.content)
html = fromstring(resp.text)
wikidata_ids = html.xpath(wikidata_ids_xpath)
language = resp.search_params['language'].split('-')[0]
@ -78,10 +76,9 @@ def response(resp):
# TODO: make requests asynchronous to avoid timeout when result_count > 1
for wikidata_id in wikidata_ids[:result_count]:
url = url_detail.format(query=urlencode({'page': wikidata_id,
'uselang': language}))
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
htmlresponse = get(url)
jsonresponse = loads(htmlresponse.content)
jsonresponse = loads(htmlresponse.text)
results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])
return results

View File

@ -11,13 +11,12 @@
"""
from json import loads
from urllib import urlencode, quote
from lxml.html import fromstring
from searx.url_utils import quote, urlencode
# search-url
base_url = 'https://{language}.wikipedia.org/'
search_postfix = 'w/api.php?'\
base_url = u'https://{language}.wikipedia.org/'
search_url = base_url + u'w/api.php?'\
'action=query'\
'&format=json'\
'&{query}'\
@ -37,16 +36,16 @@ def url_lang(lang):
else:
language = lang
return base_url.format(language=language)
return language
# do search-request
def request(query, params):
if query.islower():
query += '|' + query.title()
query = u'{0}|{1}'.format(query.decode('utf-8'), query.decode('utf-8').title()).encode('utf-8')
params['url'] = url_lang(params['language']) \
+ search_postfix.format(query=urlencode({'titles': query}))
params['url'] = search_url.format(query=urlencode({'titles': query}),
language=url_lang(params['language']))
return params
@ -78,7 +77,7 @@ def extract_first_paragraph(content, title, image):
def response(resp):
results = []
search_result = loads(resp.content)
search_result = loads(resp.text)
# wikipedia article's unique id
# first valid id is assumed to be the requested article
@ -99,11 +98,9 @@ def response(resp):
extract = page.get('extract')
summary = extract_first_paragraph(extract, title, image)
if not summary:
return []
# link to wikipedia article
wikipedia_link = url_lang(resp.search_params['language']) \
wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \
+ 'wiki/' + quote(title.replace(' ', '_').encode('utf8'))
results.append({'url': wikipedia_link, 'title': title})

View File

@ -8,8 +8,8 @@
# @stable yes
# @parse url, infobox
from urllib import urlencode
from lxml import etree
from searx.url_utils import urlencode
# search-url
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
@ -37,8 +37,7 @@ image_pods = {'VisualRepresentation',
# do search-request
def request(query, params):
params['url'] = search_url.format(query=urlencode({'input': query}),
api_key=api_key)
params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key)
params['headers']['Referer'] = site_url.format(query=urlencode({'i': query}))
return params
@ -56,7 +55,7 @@ def replace_pua_chars(text):
u'\uf74e': 'i', # imaginary number
u'\uf7d9': '='} # equals sign
for k, v in pua_chars.iteritems():
for k, v in pua_chars.items():
text = text.replace(k, v)
return text
@ -66,7 +65,7 @@ def replace_pua_chars(text):
def response(resp):
results = []
search_results = etree.XML(resp.content)
search_results = etree.XML(resp.text)
# return empty array if there are no results
if search_results.xpath(failure_xpath):
@ -120,10 +119,10 @@ def response(resp):
# append infobox
results.append({'infobox': infobox_title,
'attributes': result_chunks,
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
# append link to site
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
results.append({'url': resp.request.headers['Referer'],
'title': title,
'content': result_content})

View File

@ -10,10 +10,9 @@
from json import loads
from time import time
from urllib import urlencode
from lxml.etree import XML
from searx.poolrequests import get as http_get
from searx.url_utils import urlencode
# search-url
url = 'https://www.wolframalpha.com/'
@ -62,7 +61,7 @@ obtain_token()
# do search-request
def request(query, params):
# obtain token if last update was more than an hour
if time() - token['last_updated'] > 3600:
if time() - (token['last_updated'] or 0) > 3600:
obtain_token()
params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
@ -112,9 +111,9 @@ def response(resp):
results.append({'infobox': infobox_title,
'attributes': result_chunks,
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
results.append({'url': resp.request.headers['Referer'].decode('utf8'),
results.append({'url': resp.request.headers['Referer'],
'title': 'Wolfram|Alpha (' + infobox_title + ')',
'content': result_content})

View File

@ -10,11 +10,9 @@
@parse url, title, thumbnail, img_src, content
"""
from urllib import urlencode
from urlparse import urljoin
from lxml import html
import string
import re
from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['images']
@ -55,7 +53,7 @@ def response(resp):
cur_element += result_part
# fix xml-error
cur_element = string.replace(cur_element, '"></a>', '"/></a>')
cur_element = cur_element.replace('"></a>', '"/></a>')
dom = html.fromstring(cur_element)
link = dom.xpath('//a')[0]

View File

@ -13,8 +13,7 @@
"""
from json import loads
from urllib import urlencode
from urlparse import urljoin
from searx.url_utils import urlencode, urljoin
# engine dependent config
categories = ['images']

View File

@ -1,13 +1,13 @@
from lxml import html
from urllib import urlencode, unquote
from urlparse import urlparse, urljoin
from lxml.etree import _ElementStringResult, _ElementUnicodeResult
from searx.utils import html_to_text
from searx.url_utils import unquote, urlencode, urljoin, urlparse
search_url = None
url_xpath = None
content_xpath = None
title_xpath = None
paging = False
suggestion_xpath = ''
results_xpath = ''

View File

@ -13,8 +13,8 @@
# @todo parse video, audio and file results
from json import loads
from urllib import urlencode
from dateutil import parser
from searx.url_utils import urlencode
from searx.utils import html_to_text

View File

@ -11,10 +11,9 @@
@parse url, title, content, suggestion
"""
from urllib import urlencode
from urlparse import unquote
from lxml import html
from searx.engines.xpath import extract_text, extract_url
from searx.url_utils import unquote, urlencode
# engine dependent config
categories = ['general']

View File

@ -9,13 +9,13 @@
# @stable no (HTML can change)
# @parse url, title, content, publishedDate
from urllib import urlencode
import re
from datetime import datetime, timedelta
from lxml import html
from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
from datetime import datetime, timedelta
import re
from dateutil import parser
from searx.url_utils import urlencode
# engine dependent config
categories = ['news']

View File

@ -9,9 +9,9 @@
@parse url, title, content
"""
from urllib import urlencode
from lxml import html
from searx.search import logger
from searx import logger
from searx.url_utils import urlencode
logger = logger.getChild('yandex engine')

View File

@ -9,8 +9,8 @@
# @parse url, title, content, publishedDate, thumbnail, embedded
from json import loads
from urllib import urlencode
from dateutil import parser
from searx.url_utils import urlencode
# engine dependent config
categories = ['videos', 'music']

View File

@ -8,10 +8,10 @@
# @stable no
# @parse url, title, content, publishedDate, thumbnail, embedded
from urllib import quote_plus
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import list_get
from searx.url_utils import quote_plus
# engine dependent config
categories = ['videos', 'music']

View File

@ -14,9 +14,12 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
'''
from sys import exit
from sys import exit, version_info
from searx import logger
if version_info[0] == 3:
unicode = str
logger = logger.getChild('plugins')
from searx.plugins import (doai_rewrite,

View File

@ -1,6 +1,6 @@
from flask_babel import gettext
import re
from urlparse import urlparse, parse_qsl
from searx.url_utils import urlparse, parse_qsl
regex = re.compile(r'10\.\d{4,9}/[^\s]+')

View File

@ -16,14 +16,17 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
'''
import re
from urlparse import urlparse
import sys
from lxml import etree
from os import listdir, environ
from os.path import isfile, isdir, join
from searx.plugins import logger
from flask_babel import gettext
from searx import searx_dir
from searx.url_utils import urlparse
if sys.version_info[0] == 3:
unicode = str
name = "HTTPS rewrite"
description = gettext('Rewrite HTTP links to HTTPS if possible')

View File

@ -22,7 +22,7 @@ default_on = True
# Self User Agent regex
p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE)
# attach callback to the post search hook
@ -31,7 +31,7 @@ p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
def post_search(request, search):
if search.search_query.pageno > 1:
return True
if search.search_query.query == 'ip':
if search.search_query.query == b'ip':
x_forwarded_for = request.headers.getlist("X-Forwarded-For")
if x_forwarded_for:
ip = x_forwarded_for[0]

View File

@ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
from flask_babel import gettext
import re
from urlparse import urlunparse
from searx.url_utils import urlunparse
regexes = {re.compile(r'utm_[^&]+&?'),
re.compile(r'(wkey|wemail)[^&]+&?'),

View File

@ -23,7 +23,7 @@ class Setting(object):
def __init__(self, default_value, **kwargs):
super(Setting, self).__init__()
self.value = default_value
for key, value in kwargs.iteritems():
for key, value in kwargs.items():
setattr(self, key, value)
self._post_init()
@ -38,7 +38,7 @@ class Setting(object):
return self.value
def save(self, name, resp):
resp.set_cookie(name, bytes(self.value), max_age=COOKIE_MAX_AGE)
resp.set_cookie(name, self.value, max_age=COOKIE_MAX_AGE)
class StringSetting(Setting):
@ -133,7 +133,7 @@ class MapSetting(Setting):
def save(self, name, resp):
if hasattr(self, 'key'):
resp.set_cookie(name, bytes(self.key), max_age=COOKIE_MAX_AGE)
resp.set_cookie(name, self.key, max_age=COOKIE_MAX_AGE)
class SwitchableSetting(Setting):
@ -194,7 +194,7 @@ class EnginesSetting(SwitchableSetting):
def _post_init(self):
super(EnginesSetting, self)._post_init()
transformed_choices = []
for engine_name, engine in self.choices.iteritems():
for engine_name, engine in self.choices.items():
for category in engine.categories:
transformed_choice = dict()
transformed_choice['default_on'] = not engine.disabled
@ -241,9 +241,9 @@ class Preferences(object):
'language': SearchLanguageSetting(settings['search']['language'],
choices=LANGUAGE_CODES),
'locale': EnumStringSetting(settings['ui']['default_locale'],
choices=settings['locales'].keys() + ['']),
choices=list(settings['locales'].keys()) + ['']),
'autocomplete': EnumStringSetting(settings['search']['autocomplete'],
choices=autocomplete.backends.keys() + ['']),
choices=list(autocomplete.backends.keys()) + ['']),
'image_proxy': MapSetting(settings['server']['image_proxy'],
map={'': settings['server']['image_proxy'],
'0': False,
@ -260,7 +260,7 @@ class Preferences(object):
self.unknown_params = {}
def parse_cookies(self, input_data):
for user_setting_name, user_setting in input_data.iteritems():
for user_setting_name, user_setting in input_data.items():
if user_setting_name in self.key_value_settings:
self.key_value_settings[user_setting_name].parse(user_setting)
elif user_setting_name == 'disabled_engines':
@ -274,7 +274,7 @@ class Preferences(object):
disabled_engines = []
enabled_categories = []
disabled_plugins = []
for user_setting_name, user_setting in input_data.iteritems():
for user_setting_name, user_setting in input_data.items():
if user_setting_name in self.key_value_settings:
self.key_value_settings[user_setting_name].parse(user_setting)
elif user_setting_name.startswith('engine_'):
@ -295,7 +295,7 @@ class Preferences(object):
return self.key_value_settings[user_setting_name].get_value()
def save(self, resp):
for user_setting_name, user_setting in self.key_value_settings.iteritems():
for user_setting_name, user_setting in self.key_value_settings.items():
user_setting.save(user_setting_name, resp)
self.engines.save(resp)
self.plugins.save(resp)

View File

@ -21,8 +21,12 @@ from searx.languages import language_codes
from searx.engines import (
categories, engines, engine_shortcuts
)
import string
import re
import string
import sys
if sys.version_info[0] == 3:
unicode = str
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
@ -146,7 +150,7 @@ class SearchQuery(object):
"""container for all the search parameters (query, language, etc...)"""
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range):
self.query = query
self.query = query.encode('utf-8')
self.engines = engines
self.categories = categories
self.lang = lang

View File

@ -1,9 +1,13 @@
import re
import sys
from collections import defaultdict
from operator import itemgetter
from threading import RLock
from urlparse import urlparse, unquote
from searx.engines import engines
from searx.url_utils import urlparse, unquote
if sys.version_info[0] == 3:
basestring = str
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)

View File

@ -16,8 +16,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
'''
import gc
import sys
import threading
from thread import start_new_thread
from time import time
from uuid import uuid4
import requests.exceptions
@ -33,6 +33,14 @@ from searx import logger
from searx.plugins import plugins
from searx.exceptions import SearxParameterException
try:
from thread import start_new_thread
except:
from _thread import start_new_thread
if sys.version_info[0] == 3:
unicode = str
logger = logger.getChild('search')
number_of_searches = 0
@ -387,7 +395,7 @@ class Search(object):
request_params['time_range'] = search_query.time_range
# append request to list
requests.append((selected_engine['name'], search_query.query.encode('utf-8'), request_params))
requests.append((selected_engine['name'], search_query.query, request_params))
# update timeout_limit
timeout_limit = max(timeout_limit, engine.timeout)

View File

@ -17,7 +17,7 @@ server:
ui:
themes_path : ""
default_theme : legacy
default_theme : oscar
default_locale : ""
outgoing:

View File

@ -3,7 +3,7 @@
<div class="center">
<h1>{{ _('Page not found') }}</h1>
{% autoescape false %}
<p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
<p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %}
</div>
{% endblock %}

View File

@ -3,7 +3,7 @@
<div class="center">
<h1>{{ _('Page not found') }}</h1>
{% autoescape false %}
<p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
<p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %}
</div>
{% endblock %}

View File

@ -3,7 +3,7 @@
<div class="text-center">
<h1>{{ _('Page not found') }}</h1>
{% autoescape false %}
<p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
<p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %}
</div>
{% endblock %}

View File

@ -3,7 +3,7 @@
<div class="center">
<h1>{{ _('Page not found') }}</h1>
{% autoescape false %}
<p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
<p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
{% endautoescape %}
</div>
{% endblock %}

View File

@ -1,13 +1,16 @@
# -*- coding: utf-8 -*-
"""Shared testing code."""
from plone.testing import Layer
from unittest2 import TestCase
from os.path import dirname, join, abspath
import os
import subprocess
import traceback
from os.path import dirname, join, abspath
from splinter import Browser
from unittest2 import TestCase
class SearxTestLayer:
@ -32,7 +35,7 @@ class SearxTestLayer:
testTearDown = classmethod(testTearDown)
class SearxRobotLayer(Layer):
class SearxRobotLayer():
"""Searx Robot Test Layer"""
def setUp(self):
@ -62,7 +65,12 @@ class SearxRobotLayer(Layer):
del os.environ['SEARX_SETTINGS_PATH']
SEARXROBOTLAYER = SearxRobotLayer()
# SEARXROBOTLAYER = SearxRobotLayer()
def run_robot_tests(tests):
print('Running {0} tests'.format(len(tests)))
for test in tests:
with Browser() as browser:
test(browser)
class SearxTestCase(TestCase):
@ -72,17 +80,19 @@ class SearxTestCase(TestCase):
if __name__ == '__main__':
from tests.test_robot import test_suite
import sys
from zope.testrunner.runner import Runner
# test cases
from tests import robot
base_dir = abspath(join(dirname(__file__), '../tests'))
if sys.argv[1] == 'robot':
r = Runner(['--color',
'--auto-progress',
'--stop-on-error',
'--path',
base_dir],
found_suites=[test_suite()])
r.run()
sys.exit(int(r.failed))
test_layer = SearxRobotLayer()
errors = False
try:
test_layer.setUp()
run_robot_tests([getattr(robot, x) for x in dir(robot) if x.startswith('test_')])
except Exception:
errors = True
print('Error occured: {0}'.format(traceback.format_exc()))
test_layer.tearDown()
sys.exit(1 if errors else 0)

28
searx/url_utils.py Normal file
View File

@ -0,0 +1,28 @@
from sys import version_info
if version_info[0] == 2:
from urllib import quote, quote_plus, unquote, urlencode
from urlparse import parse_qsl, urljoin, urlparse, urlunparse, ParseResult
else:
from urllib.parse import (
parse_qsl,
quote,
quote_plus,
unquote,
urlencode,
urljoin,
urlparse,
urlunparse,
ParseResult
)
__export__ = (parse_qsl,
quote,
quote_plus,
unquote,
urlencode,
urljoin,
urlparse,
urlunparse,
ParseResult)

View File

@ -1,11 +1,9 @@
import cStringIO
import csv
import os
import re
from babel.dates import format_date
from codecs import getincrementalencoder
from HTMLParser import HTMLParser
from imp import load_source
from os.path import splitext, join
from random import choice
@ -16,6 +14,19 @@ from searx.languages import language_codes
from searx import settings
from searx import logger
try:
from cStringIO import StringIO
except:
from io import StringIO
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
if sys.version_info[0] == 3:
unichr = chr
unicode = str
logger = logger.getChild('utils')
@ -140,7 +151,7 @@ class UnicodeWriter:
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.queue = StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = getincrementalencoder(encoding)()
@ -152,14 +163,13 @@ class UnicodeWriter:
unicode_row.append(col.encode('utf-8').strip())
else:
unicode_row.append(col)
self.writer.writerow(unicode_row)
self.writer.writerow([x.decode('utf-8') if hasattr(x, 'decode') else x for x in unicode_row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
data = self.queue.getvalue().strip('\x00')
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
self.stream.write(data.decode('utf-8'))
# empty queue
self.queue.truncate(0)
@ -231,7 +241,7 @@ def dict_subset(d, properties):
def prettify_url(url, max_length=74):
if len(url) > max_length:
chunk_len = max_length / 2 + 1
chunk_len = int(max_length / 2 + 1)
return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
else:
return url

View File

@ -22,11 +22,12 @@ if __name__ == '__main__':
from os.path import realpath, dirname
path.append(realpath(dirname(realpath(__file__)) + '/../'))
import cStringIO
import hashlib
import hmac
import json
import os
import sys
import requests
from searx import logger
@ -42,8 +43,6 @@ except:
exit(1)
from cgi import escape
from datetime import datetime, timedelta
from urllib import urlencode
from urlparse import urlparse, urljoin
from werkzeug.contrib.fixers import ProxyFix
from flask import (
Flask, request, render_template, url_for, Response, make_response,
@ -52,7 +51,7 @@ from flask import (
from flask_babel import Babel, gettext, format_date, format_decimal
from flask.json import jsonify
from searx import settings, searx_dir, searx_debug
from searx.exceptions import SearxException, SearxParameterException
from searx.exceptions import SearxParameterException
from searx.engines import (
categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
)
@ -69,6 +68,7 @@ from searx.autocomplete import searx_bang, backends as autocomplete_backends
from searx.plugins import plugins
from searx.preferences import Preferences, ValidationException
from searx.answerers import answerers
from searx.url_utils import urlencode, urlparse, urljoin
# check if the pyopenssl package is installed.
# It is needed for SSL connection without trouble, see #298
@ -78,6 +78,15 @@ except ImportError:
logger.critical("The pyopenssl package has to be installed.\n"
"Some HTTPS connections will fail")
try:
from cStringIO import StringIO
except:
from io import StringIO
if sys.version_info[0] == 3:
unicode = str
# serve pages with HTTP/1.1
from werkzeug.serving import WSGIRequestHandler
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
@ -357,6 +366,8 @@ def render(template_name, override_theme=None, **kwargs):
kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
kwargs['unicode'] = unicode
kwargs['scripts'] = set()
for plugin in request.user_plugins:
for script in plugin.js_dependencies:
@ -375,7 +386,7 @@ def render(template_name, override_theme=None, **kwargs):
def pre_request():
request.errors = []
preferences = Preferences(themes, categories.keys(), engines, plugins)
preferences = Preferences(themes, list(categories.keys()), engines, plugins)
request.preferences = preferences
try:
preferences.parse_cookies(request.cookies)
@ -479,10 +490,8 @@ def index():
for result in results:
if output_format == 'html':
if 'content' in result and result['content']:
result['content'] = highlight_content(escape(result['content'][:1024]),
search_query.query.encode('utf-8'))
result['title'] = highlight_content(escape(result['title'] or u''),
search_query.query.encode('utf-8'))
result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
else:
if result.get('content'):
result['content'] = html_to_text(result['content']).strip()
@ -510,7 +519,7 @@ def index():
result['publishedDate'] = format_date(result['publishedDate'])
if output_format == 'json':
return Response(json.dumps({'query': search_query.query,
return Response(json.dumps({'query': search_query.query.decode('utf-8'),
'number_of_results': number_of_results,
'results': results,
'answers': list(result_container.answers),
@ -519,7 +528,7 @@ def index():
'suggestions': list(result_container.suggestions)}),
mimetype='application/json')
elif output_format == 'csv':
csv = UnicodeWriter(cStringIO.StringIO())
csv = UnicodeWriter(StringIO())
keys = ('title', 'url', 'content', 'host', 'engine', 'score')
csv.writerow(keys)
for row in results:
@ -527,7 +536,7 @@ def index():
csv.writerow([row.get(key, '') for key in keys])
csv.stream.seek(0)
response = Response(csv.stream.read(), mimetype='application/csv')
cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.encode('utf-8'))
cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
response.headers.add('Content-Disposition', cont_disp)
return response
elif output_format == 'rss':
@ -578,7 +587,7 @@ def autocompleter():
disabled_engines = request.preferences.engines.get_disabled()
# parse query
raw_text_query = RawTextQuery(request.form.get('q', '').encode('utf-8'), disabled_engines)
raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines)
raw_text_query.parse_query()
# check if search query is set
@ -820,6 +829,7 @@ def page_not_found(e):
def run():
logger.debug('starting webserver on %s:%s', settings['server']['port'], settings['server']['bind_address'])
app.run(
debug=searx_debug,
use_debugger=searx_debug,

View File

@ -0,0 +1,75 @@
# -*- coding: utf-8 -*-
from time import sleep
url = "http://localhost:11111/"
def test_index(browser):
# Visit URL
browser.visit(url)
assert browser.is_text_present('about')
def test_404(browser):
# Visit URL
browser.visit(url + 'missing_link')
assert browser.is_text_present('Page not found')
def test_about(browser):
browser.visit(url)
browser.click_link_by_text('about')
assert browser.is_text_present('Why use searx?')
def test_preferences(browser):
browser.visit(url)
browser.click_link_by_text('preferences')
assert browser.is_text_present('Preferences')
assert browser.is_text_present('Cookies')
assert browser.is_element_present_by_xpath('//label[@for="checkbox_dummy"]')
def test_preferences_engine_select(browser):
browser.visit(url)
browser.click_link_by_text('preferences')
assert browser.is_element_present_by_xpath('//a[@href="#tab_engine"]')
browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
assert not browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
browser.find_by_xpath('//label[@for="engine_general_dummy__general"]').first.check()
browser.find_by_xpath('//input[@value="save"]').first.click()
# waiting for the redirect - without this the test is flaky..
sleep(1)
browser.visit(url)
browser.click_link_by_text('preferences')
browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
assert browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
def test_preferences_locale(browser):
browser.visit(url)
browser.click_link_by_text('preferences')
browser.select('locale', 'hu')
browser.find_by_xpath('//input[@value="save"]').first.click()
# waiting for the redirect - without this the test is flaky..
sleep(1)
browser.visit(url)
browser.click_link_by_text('beállítások')
browser.is_text_present('Beállítások')
def test_search(browser):
browser.visit(url)
browser.fill('q', 'test search query')
browser.find_by_xpath('//button[@type="submit"]').first.click()
assert browser.is_text_present('didn\'t find any results')

View File

@ -1,153 +0,0 @@
*** Settings ***
Library Selenium2Library timeout=10 implicit_wait=0.5
Test Setup Open Browser http://localhost:11111/
Test Teardown Close All Browsers
*** Keywords ***
Submit Preferences
Set Selenium Speed 2 seconds
Submit Form id=search_form
Location Should Be http://localhost:11111/
Set Selenium Speed 0 seconds
*** Test Cases ***
Front page
Page Should Contain about
Page Should Contain preferences
404 page
Go To http://localhost:11111/no-such-page
Page Should Contain Page not found
Page Should Contain Go to search page
About page
Click Element link=about
Page Should Contain Why use searx?
Page Should Contain Element link=search engines
Preferences page
Click Element link=preferences
Page Should Contain Preferences
Page Should Contain Default categories
Page Should Contain Currently used search engines
Page Should Contain dummy dummy
Page Should Contain general dummy
Switch category
Go To http://localhost:11111/preferences
Page Should Contain Checkbox category_general
Page Should Contain Checkbox category_dummy
Click Element xpath=//*[.="general"]
Click Element xpath=//*[.="dummy"]
Submit Preferences
Checkbox Should Not Be Selected category_general
Checkbox Should Be Selected category_dummy
Change language
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
Select From List locale hu
Submit Preferences
Page Should Contain rólunk
Page Should Contain beállítások
Change method
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
Select From List method GET
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be method GET
Select From List method POST
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be method POST
Change theme
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
List Selection Should Be theme legacy
Select From List theme oscar
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be theme oscar
Change safesearch
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
List Selection Should Be safesearch None
Select From List safesearch Strict
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be safesearch Strict
Change image proxy
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
List Selection Should Be image_proxy Disabled
Select From List image_proxy Enabled
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be image_proxy Enabled
Change search language
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
List Selection Should Be language Default language
Select From List language Türkçe - tr-TR
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be language Türkçe - tr-TR
Change autocomplete
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
List Selection Should Be autocomplete -
Select From List autocomplete google
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be autocomplete google
Change allowed/disabled engines
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
Page Should Contain Engine name
Element Should Contain xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy'] Block
Element Should Contain xpath=//label[@class="deny"][@for='engine_general_general_dummy'] Block
Click Element xpath=//label[@class="deny"][@for='engine_general_general_dummy']
Submit Preferences
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
Page Should Contain Engine name
Element Should Contain xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy'] Block
Element Should Contain xpath=//label[@class="deny"][@for='engine_general_general_dummy'] \
Block a plugin
Page Should Contain about
Page Should Contain preferences
Go To http://localhost:11111/preferences
List Selection Should Be theme legacy
Select From List theme oscar
Submit Preferences
Go To http://localhost:11111/preferences
List Selection Should Be theme oscar
Page Should Contain Plugins
Click Link Plugins
Checkbox Should Not Be Selected id=plugin_HTTPS_rewrite
Click Element xpath=//label[@for='plugin_HTTPS_rewrite']
Submit Preferences
Go To http://localhost:11111/preferences
Page Should Contain Plugins
Click Link Plugins
Checkbox Should Be Selected id=plugin_HTTPS_rewrite

View File

@ -25,7 +25,7 @@ class TestArchLinuxEngine(SearxTestCase):
self.assertTrue(query in params['url'])
self.assertTrue('wiki.archlinux.org' in params['url'])
for lang, domain in domains.iteritems():
for lang, domain in domains.items():
dic['language'] = lang
params = archlinux.request(query, dic)
self.assertTrue(domain in params['url'])
@ -102,5 +102,5 @@ class TestArchLinuxEngine(SearxTestCase):
for exp in expected:
res = results[i]
i += 1
for key, value in exp.iteritems():
for key, value in exp.items():
self.assertEqual(res[key], value)

View File

@ -7,18 +7,18 @@ from searx.testing import SearxTestCase
class TestBingEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
query = u'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 0
dicto['language'] = 'fr_FR'
params = bing.request(query, dicto)
params = bing.request(query.encode('utf-8'), dicto)
self.assertTrue('url' in params)
self.assertTrue(query in params['url'])
self.assertTrue('language%3AFR' in params['url'])
self.assertTrue('bing.com' in params['url'])
dicto['language'] = 'all'
params = bing.request(query, dicto)
params = bing.request(query.encode('utf-8'), dicto)
self.assertTrue('language' in params['url'])
def test_response(self):

View File

@ -36,10 +36,10 @@ class TestBingNewsEngine(SearxTestCase):
self.assertRaises(AttributeError, bing_news.response, '')
self.assertRaises(AttributeError, bing_news.response, '[]')
response = mock.Mock(content='<html></html>')
response = mock.Mock(text='<html></html>')
self.assertEqual(bing_news.response(response), [])
response = mock.Mock(content='<html></html>')
response = mock.Mock(text='<html></html>')
self.assertEqual(bing_news.response(response), [])
html = """<?xml version="1.0" encoding="utf-8" ?>
@ -74,7 +74,7 @@ class TestBingNewsEngine(SearxTestCase):
</item>
</channel>
</rss>""" # noqa
response = mock.Mock(content=html)
response = mock.Mock(text=html.encode('utf-8'))
results = bing_news.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 2)
@ -113,7 +113,7 @@ class TestBingNewsEngine(SearxTestCase):
</item>
</channel>
</rss>""" # noqa
response = mock.Mock(content=html)
response = mock.Mock(text=html.encode('utf-8'))
results = bing_news.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
@ -136,11 +136,11 @@ class TestBingNewsEngine(SearxTestCase):
</channel>
</rss>""" # noqa
response = mock.Mock(content=html)
response = mock.Mock(text=html.encode('utf-8'))
results = bing_news.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """<?xml version="1.0" encoding="utf-8" ?>gabarge"""
response = mock.Mock(content=html)
response = mock.Mock(text=html.encode('utf-8'))
self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response)

View File

@ -22,10 +22,10 @@ class TestBtdiggEngine(SearxTestCase):
self.assertRaises(AttributeError, btdigg.response, '')
self.assertRaises(AttributeError, btdigg.response, '[]')
response = mock.Mock(content='<html></html>')
response = mock.Mock(text='<html></html>')
self.assertEqual(btdigg.response(response), [])
html = """
html = u"""
<div id="search_res">
<table>
<tr>
@ -82,7 +82,7 @@ class TestBtdiggEngine(SearxTestCase):
</table>
</div>
"""
response = mock.Mock(content=html)
response = mock.Mock(text=html.encode('utf-8'))
results = btdigg.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
@ -101,12 +101,12 @@ class TestBtdiggEngine(SearxTestCase):
</table>
</div>
"""
response = mock.Mock(content=html)
response = mock.Mock(text=html.encode('utf-8'))
results = btdigg.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 0)
html = """
html = u"""
<div id="search_res">
<table>
<tr>
@ -367,7 +367,7 @@ class TestBtdiggEngine(SearxTestCase):
</table>
</div>
"""
response = mock.Mock(content=html)
response = mock.Mock(text=html.encode('utf-8'))
results = btdigg.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 5)

Some files were not shown because too many files have changed in this diff Show More