1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-22 04:01:40 +01:00

Drop Python 2 (1/n): remove unicode string and url_utils

This commit is contained in:
Dalf 2020-08-06 17:42:46 +02:00 committed by Alexandre Flament
parent 272158944b
commit 1022228d95
112 changed files with 388 additions and 535 deletions

View File

@ -213,10 +213,6 @@ gecko.driver:
PHONY += test test.sh test.pylint test.pep8 test.unit test.coverage test.robot PHONY += test test.sh test.pylint test.pep8 test.unit test.coverage test.robot
test: buildenv test.pylint test.pep8 test.unit gecko.driver test.robot test: buildenv test.pylint test.pep8 test.unit gecko.driver test.robot
ifeq ($(PY),2)
test.pylint:
@echo "LINT skip liniting py2"
else
# TODO: balance linting with pylint # TODO: balance linting with pylint
test.pylint: pyenvinstall test.pylint: pyenvinstall
@ -225,7 +221,6 @@ test.pylint: pyenvinstall
searx/testing.py \ searx/testing.py \
searx/engines/gigablast.py \ searx/engines/gigablast.py \
) )
endif
# ignored rules: # ignored rules:
# E402 module level import not at top of file # E402 module level import not at top of file

View File

@ -39,7 +39,7 @@ install_geckodriver() {
return return
fi fi
GECKODRIVER_VERSION="v0.24.0" GECKODRIVER_VERSION="v0.24.0"
PLATFORM="`python -c "import six; import platform; six.print_(platform.system().lower(), platform.architecture()[0])"`" PLATFORM="`python3 -c "import platform; print(platform.system().lower(), platform.architecture()[0])"`"
case "$PLATFORM" in case "$PLATFORM" in
"linux 32bit" | "linux2 32bit") ARCH="linux32";; "linux 32bit" | "linux2 32bit") ARCH="linux32";;
"linux 64bit" | "linux2 64bit") ARCH="linux64";; "linux 64bit" | "linux2 64bit") ARCH="linux64";;
@ -136,7 +136,7 @@ docker_build() {
# Check consistency between the git tag and the searx/version.py file # Check consistency between the git tag and the searx/version.py file
# /!\ HACK : parse Python file with bash /!\ # /!\ HACK : parse Python file with bash /!\
# otherwise it is not possible build the docker image without all Python dependencies ( version.py loads __init__.py ) # otherwise it is not possible build the docker image without all Python dependencies ( version.py loads __init__.py )
# SEARX_PYTHON_VERSION=$(python -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)") # SEARX_PYTHON_VERSION=$(python3 -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)")
SEARX_PYTHON_VERSION=$(cat searx/version.py | grep "\(VERSION_MAJOR\|VERSION_MINOR\|VERSION_BUILD\) =" | cut -d\= -f2 | sed -e 's/^[[:space:]]*//' | paste -sd "." -) SEARX_PYTHON_VERSION=$(cat searx/version.py | grep "\(VERSION_MAJOR\|VERSION_MINOR\|VERSION_BUILD\) =" | cut -d\= -f2 | sed -e 's/^[[:space:]]*//' | paste -sd "." -)
if [ $(echo "$SEARX_GIT_VERSION" | cut -d- -f1) != "$SEARX_PYTHON_VERSION" ]; then if [ $(echo "$SEARX_GIT_VERSION" | cut -d- -f1) != "$SEARX_PYTHON_VERSION" ]; then
echo "Inconsistency between the last git tag and the searx/version.py file" echo "Inconsistency between the last git tag and the searx/version.py file"

View File

@ -21,12 +21,8 @@ from os import environ
from os.path import realpath, dirname, join, abspath, isfile from os.path import realpath, dirname, join, abspath, isfile
from io import open from io import open
from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION
try: from yaml import safe_load
from yaml import safe_load
except:
from sys import exit, stderr
stderr.write('[E] install pyyaml\n')
exit(2)
searx_dir = abspath(dirname(__file__)) searx_dir = abspath(dirname(__file__))
engine_dir = dirname(realpath(__file__)) engine_dir = dirname(realpath(__file__))

View File

@ -1,12 +1,8 @@
from os import listdir from os import listdir
from os.path import realpath, dirname, join, isdir from os.path import realpath, dirname, join, isdir
from sys import version_info
from searx.utils import load_module from searx.utils import load_module
from collections import defaultdict from collections import defaultdict
if version_info[0] == 3:
unicode = str
answerers_dir = dirname(realpath(__file__)) answerers_dir = dirname(realpath(__file__))
@ -36,10 +32,10 @@ def ask(query):
results = [] results = []
query_parts = list(filter(None, query.query.split())) query_parts = list(filter(None, query.query.split()))
if query_parts[0].decode('utf-8') not in answerers_by_keywords: if query_parts[0].decode() not in answerers_by_keywords:
return results return results
for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]: for answerer in answerers_by_keywords[query_parts[0].decode()]:
result = answerer(query) result = answerer(query)
if result: if result:
results.append(result) results.append(result)

View File

@ -1,7 +1,6 @@
import hashlib import hashlib
import random import random
import string import string
import sys
import uuid import uuid
from flask_babel import gettext from flask_babel import gettext
@ -10,12 +9,7 @@ from flask_babel import gettext
keywords = ('random',) keywords = ('random',)
random_int_max = 2**31 random_int_max = 2**31
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
if sys.version_info[0] == 2:
random_string_letters = string.lowercase + string.digits + string.uppercase
else:
unicode = str
random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
def random_characters(): def random_characters():
@ -24,25 +18,25 @@ def random_characters():
def random_string(): def random_string():
return u''.join(random_characters()) return ''.join(random_characters())
def random_float(): def random_float():
return unicode(random.random()) return str(random.random())
def random_int(): def random_int():
return unicode(random.randint(-random_int_max, random_int_max)) return str(random.randint(-random_int_max, random_int_max))
def random_sha256(): def random_sha256():
m = hashlib.sha256() m = hashlib.sha256()
m.update(''.join(random_characters()).encode()) m.update(''.join(random_characters()).encode())
return unicode(m.hexdigest()) return str(m.hexdigest())
def random_uuid(): def random_uuid():
return unicode(uuid.uuid4()) return str(uuid.uuid4())
random_types = {b'string': random_string, random_types = {b'string': random_string,
@ -70,4 +64,4 @@ def answer(query):
def self_info(): def self_info():
return {'name': gettext('Random value generator'), return {'name': gettext('Random value generator'),
'description': gettext('Generate different random values'), 'description': gettext('Generate different random values'),
'examples': [u'random {}'.format(x.decode('utf-8')) for x in random_types]} 'examples': ['random {}'.format(x.decode()) for x in random_types]}

View File

@ -1,11 +1,8 @@
from sys import version_info
from functools import reduce from functools import reduce
from operator import mul from operator import mul
from flask_babel import gettext from flask_babel import gettext
if version_info[0] == 3:
unicode = str
keywords = ('min', keywords = ('min',
'max', 'max',
@ -44,7 +41,7 @@ def answer(query):
if answer is None: if answer is None:
return [] return []
return [{'answer': unicode(answer)}] return [{'answer': str(answer)}]
# required answerer function # required answerer function

View File

@ -16,19 +16,16 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
''' '''
import sys
from lxml import etree from lxml import etree
from json import loads from json import loads
from urllib.parse import urlencode
from searx import settings from searx import settings
from searx.languages import language_codes from searx.languages import language_codes
from searx.engines import ( from searx.engines import (
categories, engines, engine_shortcuts categories, engines, engine_shortcuts
) )
from searx.poolrequests import get as http_get from searx.poolrequests import get as http_get
from searx.url_utils import urlencode
if sys.version_info[0] == 3:
unicode = str
def get(*args, **kwargs): def get(*args, **kwargs):
@ -85,22 +82,22 @@ def searx_bang(full_query):
engine_query = full_query.getSearchQuery()[1:] engine_query = full_query.getSearchQuery()[1:]
for lc in language_codes: for lc in language_codes:
lang_id, lang_name, country, english_name = map(unicode.lower, lc) lang_id, lang_name, country, english_name = map(str.lower, lc)
# check if query starts with language-id # check if query starts with language-id
if lang_id.startswith(engine_query): if lang_id.startswith(engine_query):
if len(engine_query) <= 2: if len(engine_query) <= 2:
results.append(u':{lang_id}'.format(lang_id=lang_id.split('-')[0])) results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
else: else:
results.append(u':{lang_id}'.format(lang_id=lang_id)) results.append(':{lang_id}'.format(lang_id=lang_id))
# check if query starts with language name # check if query starts with language name
if lang_name.startswith(engine_query) or english_name.startswith(engine_query): if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
results.append(u':{lang_name}'.format(lang_name=lang_name)) results.append(':{lang_name}'.format(lang_name=lang_name))
# check if query starts with country # check if query starts with country
if country.startswith(engine_query.replace('_', ' ')): if country.startswith(engine_query.replace('_', ' ')):
results.append(u':{country}'.format(country=country.replace(' ', '_'))) results.append(':{country}'.format(country=country.replace(' ', '_')))
# remove duplicates # remove duplicates
result_set = set(results) result_set = set(results)

View File

@ -1,7 +1,8 @@
from urllib.parse import quote, urljoin
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size from searx.utils import get_torrent_size
from searx.url_utils import quote, urljoin
url = 'https://1337x.to/' url = 'https://1337x.to/'
search_url = url + 'search/{search_term}/{pageno}/' search_url = url + 'search/{search_term}/{pageno}/'

View File

@ -9,9 +9,9 @@
@parse url, title, content, seed, leech, torrentfile @parse url, title, content, seed, leech, torrentfile
""" """
from urllib.parse import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
from searx.utils import get_torrent_size, int_or_zero from searx.utils import get_torrent_size, int_or_zero
# engine dependent config # engine dependent config
@ -63,7 +63,7 @@ def response(resp):
except: except:
pass pass
# I didn't add download/seed/leech count since as I figured out they are generated randomly everytime # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime
content = u'Category: "{category}".' content = 'Category: "{category}".'
content = content.format(category=category) content = content.format(category=category)
results.append({'url': href, results.append({'url': href,

View File

@ -9,9 +9,10 @@
@parse url, title, thumbnail_src @parse url, title, thumbnail_src
""" """
from urllib.parse import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']

View File

@ -11,9 +11,9 @@
@parse url, title @parse url, title
""" """
from urllib.parse import urlencode, urljoin
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']

View File

@ -11,9 +11,9 @@
More info on api: https://arxiv.org/help/api/user-manual More info on api: https://arxiv.org/help/api/user-manual
""" """
from urllib.parse import urlencode
from lxml import html from lxml import html
from datetime import datetime from datetime import datetime
from searx.url_utils import urlencode
categories = ['science'] categories = ['science']
@ -30,7 +30,7 @@ def request(query, params):
# basic search # basic search
offset = (params['pageno'] - 1) * number_of_results offset = (params['pageno'] - 1) * number_of_results
string_args = dict(query=query.decode('utf-8'), string_args = dict(query=query.decode(),
offset=offset, offset=offset,
number_of_results=number_of_results) number_of_results=number_of_results)

View File

@ -13,10 +13,10 @@
More info on api: http://base-search.net/about/download/base_interface.pdf More info on api: http://base-search.net/about/download/base_interface.pdf
""" """
from urllib.parse import urlencode
from lxml import etree from lxml import etree
from datetime import datetime from datetime import datetime
import re import re
from searx.url_utils import urlencode
from searx.utils import searx_useragent from searx.utils import searx_useragent

View File

@ -14,10 +14,10 @@
""" """
import re import re
from urllib.parse import urlencode
from lxml import html from lxml import html
from searx import logger, utils from searx import logger, utils
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
from searx.utils import match_language, gen_useragent, eval_xpath from searx.utils import match_language, gen_useragent, eval_xpath
logger = logger.getChild('bing engine') logger = logger.getChild('bing engine')
@ -47,7 +47,7 @@ def request(query, params):
else: else:
lang = match_language(params['language'], supported_languages, language_aliases) lang = match_language(params['language'], supported_languages, language_aliases)
query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8') query = 'language:{} {}'.format(lang.split('-')[0].upper(), query.decode()).encode()
search_path = search_string.format( search_path = search_string.format(
query=urlencode({'q': query}), query=urlencode({'q': query}),

View File

@ -12,10 +12,10 @@
""" """
from urllib.parse import urlencode
from lxml import html from lxml import html
from json import loads from json import loads
import re import re
from searx.url_utils import urlencode
from searx.utils import match_language from searx.utils import match_language
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
@ -91,7 +91,7 @@ def response(resp):
# strip 'Unicode private use area' highlighting, they render to Tux # strip 'Unicode private use area' highlighting, they render to Tux
# the Linux penguin and a standing diamond on my machine... # the Linux penguin and a standing diamond on my machine...
title = m.get('t', '').replace(u'\ue000', '').replace(u'\ue001', '') title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
results.append({'template': 'images.html', results.append({'template': 'images.html',
'url': m['purl'], 'url': m['purl'],
'thumbnail_src': m['turl'], 'thumbnail_src': m['turl'],

View File

@ -13,10 +13,9 @@
from datetime import datetime from datetime import datetime
from dateutil import parser from dateutil import parser
from urllib.parse import urlencode, urlparse, parse_qsl
from lxml import etree from lxml import etree
from searx.utils import list_get, match_language from searx.utils import list_get, match_language
from searx.url_utils import urlencode, urlparse, parse_qsl
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
# engine dependent config # engine dependent config

View File

@ -12,7 +12,7 @@
from json import loads from json import loads
from lxml import html from lxml import html
from searx.url_utils import urlencode from urllib.parse import urlencode
from searx.utils import match_language from searx.utils import match_language
from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases

View File

@ -12,8 +12,8 @@
from lxml import html from lxml import html
from operator import itemgetter from operator import itemgetter
from urllib.parse import quote, urljoin
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import quote, urljoin
from searx.utils import get_torrent_size from searx.utils import get_torrent_size
# engine dependent config # engine dependent config

View File

@ -1,14 +1,11 @@
import json import json
import re import re
import os import os
import sys
import unicodedata import unicodedata
from io import open from io import open
from datetime import datetime from datetime import datetime
if sys.version_info[0] == 3:
unicode = str
categories = [] categories = []
url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}' url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
@ -20,7 +17,7 @@ db = 1
def normalize_name(name): def normalize_name(name):
name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s') name = name.decode().lower().replace('-', ' ').rstrip('s')
name = re.sub(' +', ' ', name) name = re.sub(' +', ' ', name)
return unicodedata.normalize('NFKD', name).lower() return unicodedata.normalize('NFKD', name).lower()

View File

@ -14,7 +14,7 @@
from json import loads from json import loads
from datetime import datetime from datetime import datetime
from searx.url_utils import urlencode from urllib.parse import urlencode
from searx.utils import match_language, html_to_text from searx.utils import match_language, html_to_text
# engine dependent config # engine dependent config

View File

@ -11,7 +11,7 @@
""" """
from json import loads from json import loads
from searx.url_utils import urlencode from urllib.parse import urlencode
# engine dependent config # engine dependent config
categories = ['music'] categories = ['music']
@ -50,7 +50,7 @@ def response(resp):
if url.startswith('http://'): if url.startswith('http://'):
url = 'https' + url[4:] url = 'https' + url[4:]
content = u'{} - {} - {}'.format( content = '{} - {} - {}'.format(
result['artist']['name'], result['artist']['name'],
result['album']['title'], result['album']['title'],
result['title']) result['title'])

View File

@ -14,8 +14,9 @@
from lxml import html from lxml import html
import re import re
from urllib.parse import urlencode
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']

View File

@ -10,12 +10,12 @@
""" """
import re import re
from urllib.parse import urljoin
from lxml import html from lxml import html
from searx.utils import is_valid_lang, eval_xpath from searx.utils import is_valid_lang, eval_xpath
from searx.url_utils import urljoin
categories = ['general'] categories = ['general']
url = u'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
weight = 100 weight = 100
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
@ -37,7 +37,7 @@ def request(query, params):
params['url'] = url.format(from_lang=from_lang[2], params['url'] = url.format(from_lang=from_lang[2],
to_lang=to_lang[2], to_lang=to_lang[2],
query=query.decode('utf-8')) query=query.decode())
return params return params

View File

@ -10,14 +10,11 @@
@parse url, title, content, magnetlink @parse url, title, content, magnetlink
""" """
from sys import version_info from urllib.parse import urljoin
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size from searx.utils import get_torrent_size
from searx.url_utils import urljoin
if version_info[0] == 3:
unicode = str
categories = ['videos', 'music', 'files'] categories = ['videos', 'music', 'files']
paging = True paging = True

View File

@ -14,8 +14,8 @@ import random
import string import string
from dateutil import parser from dateutil import parser
from json import loads from json import loads
from urllib.parse import urlencode
from lxml import html from lxml import html
from searx.url_utils import urlencode
from datetime import datetime from datetime import datetime
# engine dependent config # engine dependent config

View File

@ -9,10 +9,10 @@
# @stable yes # @stable yes
# @parse (general) url, title, content # @parse (general) url, title, content
from urllib.parse import urlencode
from lxml.html import fromstring from lxml.html import fromstring
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import eval_xpath from searx.utils import eval_xpath
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['general'] # TODO , 'images', 'music', 'videos', 'files' categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'

View File

@ -15,9 +15,9 @@
from lxml.html import fromstring from lxml.html import fromstring
from json import loads from json import loads
from urllib.parse import urlencode
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.poolrequests import get from searx.poolrequests import get
from searx.url_utils import urlencode
from searx.utils import match_language, eval_xpath from searx.utils import match_language, eval_xpath
# engine dependent config # engine dependent config

View File

@ -10,11 +10,11 @@ DuckDuckGo (definitions)
""" """
import json import json
from urllib.parse import urlencode
from lxml import html from lxml import html
from re import compile from re import compile
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases
from searx.url_utils import urlencode
from searx.utils import html_to_text, match_language from searx.utils import html_to_text, match_language
url = 'https://api.duckduckgo.com/'\ url = 'https://api.duckduckgo.com/'\

View File

@ -14,13 +14,13 @@
""" """
from json import loads from json import loads
from urllib.parse import urlencode
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import ( from searx.engines.duckduckgo import (
_fetch_supported_languages, supported_languages_url, _fetch_supported_languages, supported_languages_url,
get_region_code, language_aliases get_region_code, language_aliases
) )
from searx.poolrequests import get from searx.poolrequests import get
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']

View File

@ -10,9 +10,9 @@
from lxml import html, etree from lxml import html, etree
import re import re
from urllib.parse import quote, urljoin
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import eval_xpath from searx.utils import eval_xpath
from searx.url_utils import quote, urljoin
from searx import logger from searx import logger
categories = ['general'] categories = ['general']

View File

@ -10,8 +10,8 @@
""" """
from lxml import html from lxml import html
from urllib.parse import quote
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import quote
from searx.utils import eval_xpath from searx.utils import eval_xpath
categories = ['general'] categories = ['general']

View File

@ -9,9 +9,9 @@
@parse url, title, content @parse url, title, content
""" """
from urllib.parse import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['files'] categories = ['files']

View File

@ -1,9 +1,6 @@
from searx.url_utils import urlencode from html.parser import HTMLParser
from urllib.parse import urlencode
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
url = 'http://www.filecrop.com/' url = 'http://www.filecrop.com/'
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}' # noqa

View File

@ -14,7 +14,7 @@
""" """
from json import loads from json import loads
from searx.url_utils import urlencode from urllib.parse import urlencode
categories = ['images'] categories = ['images']

View File

@ -15,8 +15,8 @@
from json import loads from json import loads
from time import time from time import time
import re import re
from urllib.parse import urlencode
from searx.engines import logger from searx.engines import logger
from searx.url_utils import urlencode
from searx.utils import ecma_unescape, html_to_text from searx.utils import ecma_unescape, html_to_text
logger = logger.getChild('flickr-noapi') logger = logger.getChild('flickr-noapi')
@ -117,10 +117,10 @@ def response(resp):
'img_format': img_format, 'img_format': img_format,
'template': 'images.html' 'template': 'images.html'
} }
result['author'] = author.encode('utf-8', 'ignore').decode('utf-8') result['author'] = author.encode(errors='ignore').decode()
result['source'] = source.encode('utf-8', 'ignore').decode('utf-8') result['source'] = source.encode(errors='ignore').decode()
result['title'] = title.encode('utf-8', 'ignore').decode('utf-8') result['title'] = title.encode(errors='ignore').decode()
result['content'] = content.encode('utf-8', 'ignore').decode('utf-8') result['content'] = content.encode(errors='ignore').decode()
results.append(result) results.append(result)
return results return results

View File

@ -10,13 +10,10 @@
@parse url, title, content, thumbnail, img_src @parse url, title, content, thumbnail, img_src
""" """
try: from html import escape
from cgi import escape from urllib.parse import urljoin, urlencode
except:
from html import escape
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urljoin, urlencode
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']

View File

@ -10,7 +10,7 @@ Frinkiac (Images)
""" """
from json import loads from json import loads
from searx.url_utils import urlencode from urllib.parse import urlencode
categories = ['images'] categories = ['images']

View File

@ -11,7 +11,7 @@ Genius
""" """
from json import loads from json import loads
from searx.url_utils import urlencode from urllib.parse import urlencode
from datetime import datetime from datetime import datetime
# engine dependent config # engine dependent config

View File

@ -11,9 +11,9 @@
@parse url, title @parse url, title
""" """
from urllib.parse import urlencode, urljoin
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']
@ -90,7 +90,7 @@ def request(query, params):
# if our language is hosted on the main site, we need to add its name # if our language is hosted on the main site, we need to add its name
# to the query in order to narrow the results to that language # to the query in order to narrow the results to that language
if language in main_langs: if language in main_langs:
query += b' (' + (main_langs[language]).encode('utf-8') + b')' query += b' (' + (main_langs[language]).encode() + b')'
# prepare the request parameters # prepare the request parameters
query = urlencode({'search': query}) query = urlencode({'search': query})

View File

@ -14,8 +14,8 @@
import re import re
from json import loads from json import loads
from urllib.parse import urlencode
# from searx import logger # from searx import logger
from searx.url_utils import urlencode
from searx.poolrequests import get from searx.poolrequests import get
# engine dependent config # engine dependent config

View File

@ -11,7 +11,7 @@
""" """
from json import loads from json import loads
from searx.url_utils import urlencode from urllib.parse import urlencode
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']

View File

@ -18,11 +18,11 @@ Definitions`_.
# pylint: disable=invalid-name, missing-function-docstring # pylint: disable=invalid-name, missing-function-docstring
from urllib.parse import urlencode, urlparse
from lxml import html from lxml import html
from flask_babel import gettext from flask_babel import gettext
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx import logger from searx import logger
from searx.url_utils import urlencode, urlparse
from searx.utils import match_language, eval_xpath from searx.utils import match_language, eval_xpath
logger = logger.getChild('google engine') logger = logger.getChild('google engine')

View File

@ -24,11 +24,10 @@ Definitions`_.
""" """
import urllib from urllib.parse import urlencode, urlparse, unquote
from lxml import html from lxml import html
from flask_babel import gettext from flask_babel import gettext
from searx import logger from searx import logger
from searx.url_utils import urlencode, urlparse
from searx.utils import eval_xpath from searx.utils import eval_xpath
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
@ -87,7 +86,7 @@ def scrap_img_by_id(script, data_id):
if 'gstatic.com/images' in line and data_id in line: if 'gstatic.com/images' in line and data_id in line:
url_line = _script[i + 1] url_line = _script[i + 1]
img_url = url_line.split('"')[1] img_url = url_line.split('"')[1]
img_url = urllib.parse.unquote(img_url.replace(r'\u00', r'%')) img_url = unquote(img_url.replace(r'\u00', r'%'))
return img_url return img_url

View File

@ -10,9 +10,9 @@
@parse url, title, content, publishedDate @parse url, title, content, publishedDate
""" """
from urllib.parse import urlencode
from lxml import html from lxml import html
from searx.engines.google import _fetch_supported_languages, supported_languages_url from searx.engines.google import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
from searx.utils import match_language from searx.utils import match_language
# search-url # search-url

View File

@ -12,9 +12,9 @@
from datetime import date, timedelta from datetime import date, timedelta
from json import loads from json import loads
from urllib.parse import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
import re import re
# engine dependent config # engine dependent config

View File

@ -12,15 +12,12 @@
# @todo embedded (needs some md5 from video page) # @todo embedded (needs some md5 from video page)
from json import loads from json import loads
from urllib.parse import urlencode
from lxml import html from lxml import html
from dateutil import parser from dateutil import parser
from html.parser import HTMLParser
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']

View File

@ -8,7 +8,7 @@
# @stable yes # @stable yes
# @parse url, title, content, publishedDate, thumbnail, embedded, author, length # @parse url, title, content, publishedDate, thumbnail, embedded, author, length
from searx.url_utils import quote_plus from urllib.parse import quote_plus
from dateutil import parser from dateutil import parser
import time import time

View File

@ -1,11 +1,8 @@
from collections import Iterable from collections import Iterable
from json import loads from json import loads
from sys import version_info from urllib.parse import urlencode
from searx.url_utils import urlencode
from searx.utils import to_string from searx.utils import to_string
if version_info[0] == 3:
unicode = str
search_url = None search_url = None
url_query = None url_query = None
@ -37,8 +34,6 @@ def iterate(iterable):
def is_iterable(obj): def is_iterable(obj):
if type(obj) == str: if type(obj) == str:
return False return False
if type(obj) == unicode:
return False
return isinstance(obj, Iterable) return isinstance(obj, Iterable)

View File

@ -12,9 +12,9 @@
from lxml import html from lxml import html
from operator import itemgetter from operator import itemgetter
from urllib.parse import quote, urljoin
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size, convert_str_to_int from searx.utils import get_torrent_size, convert_str_to_int
from searx.url_utils import quote, urljoin
# engine dependent config # engine dependent config
categories = ['videos', 'music', 'files'] categories = ['videos', 'music', 'files']

View File

@ -14,7 +14,7 @@
from json import loads from json import loads
from string import Formatter from string import Formatter
from searx.url_utils import urlencode, quote from urllib.parse import urlencode, quote
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
@ -79,7 +79,7 @@ def response(resp):
if result.get('snippet', '').startswith('#REDIRECT'): if result.get('snippet', '').startswith('#REDIRECT'):
continue continue
url = base_url.format(language=resp.search_params['language']) +\ url = base_url.format(language=resp.search_params['language']) +\
'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')) 'wiki/' + quote(result['title'].replace(' ', '_').encode())
# append result # append result
results.append({'url': url, results.append({'url': url,

View File

@ -12,8 +12,7 @@ Microsoft Academic (Science)
from datetime import datetime from datetime import datetime
from json import loads from json import loads
from uuid import uuid4 from uuid import uuid4
from urllib.parse import urlencode
from searx.url_utils import urlencode
from searx.utils import html_to_text from searx.utils import html_to_text
categories = ['images'] categories = ['images']

View File

@ -12,7 +12,7 @@
from json import loads from json import loads
from dateutil import parser from dateutil import parser
from searx.url_utils import urlencode from urllib.parse import urlencode
# engine dependent config # engine dependent config
categories = ['music'] categories = ['music']

View File

@ -10,8 +10,8 @@
""" """
from lxml import html from lxml import html
from urllib.parse import urlencode
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
from searx.utils import get_torrent_size, int_or_zero from searx.utils import get_torrent_size, int_or_zero
# engine dependent config # engine dependent config

View File

@ -30,8 +30,8 @@ route_re = re.compile('(?:from )?(.+) to (.+)')
# do search-request # do search-request
def request(query, params): def request(query, params):
params['url'] = base_url + search_string.format(query=query.decode('utf-8')) params['url'] = base_url + search_string.format(query=query.decode())
params['route'] = route_re.match(query.decode('utf-8')) params['route'] = route_re.match(query.decode())
return params return params
@ -52,7 +52,7 @@ def response(resp):
if 'display_name' not in r: if 'display_name' not in r:
continue continue
title = r['display_name'] or u'' title = r['display_name'] or ''
osm_type = r.get('osm_type', r.get('type')) osm_type = r.get('osm_type', r.get('type'))
url = result_base_url.format(osm_type=osm_type, url = result_base_url.format(osm_type=osm_type,
osm_id=r['osm_id']) osm_id=r['osm_id'])
@ -64,7 +64,7 @@ def response(resp):
# if no geojson is found and osm_type is a node, add geojson Point # if no geojson is found and osm_type is a node, add geojson Point
if not geojson and osm_type == 'node': if not geojson and osm_type == 'node':
geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]} geojson = {'type': 'Point', 'coordinates': [r['lon'], r['lat']]}
address_raw = r.get('address') address_raw = r.get('address')
address = {} address = {}

View File

@ -14,7 +14,7 @@
from json import loads from json import loads
from datetime import datetime from datetime import datetime
from searx.url_utils import urlencode from urllib.parse import urlencode
from searx.utils import html_to_text from searx.utils import html_to_text
# engine dependent config # engine dependent config

View File

@ -11,8 +11,8 @@
""" """
from json import loads from json import loads
from urllib.parse import urlencode
from searx.utils import searx_useragent from searx.utils import searx_useragent
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['map'] categories = ['map']

View File

@ -11,7 +11,9 @@
from json import loads from json import loads
from datetime import datetime from datetime import datetime
from operator import itemgetter from operator import itemgetter
from searx.url_utils import quote
from urllib.parse import quote, urljoin
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size from searx.utils import get_torrent_size
# engine dependent config # engine dependent config
@ -62,8 +64,8 @@ def response(resp):
# parse results # parse results
for result in search_res: for result in search_res:
link = url + "description.php?id=" + result["id"] link = url + "description.php?id=" + result["id"]
magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + \ magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\
"&dn=" + result["name"] + "&tr=" + "&tr=".join(trackers) + "&tr=" + "&tr=".join(trackers)
params = { params = {
"url": link, "url": link,

View File

@ -14,7 +14,7 @@
from flask_babel import gettext from flask_babel import gettext
from lxml import etree from lxml import etree
from datetime import datetime from datetime import datetime
from searx.url_utils import urlencode from urllib.parse import urlencode
from searx.poolrequests import get from searx.poolrequests import get

View File

@ -12,9 +12,9 @@
from datetime import datetime from datetime import datetime
from json import loads from json import loads
from searx.utils import html_to_text from urllib.parse import urlencode
from searx.url_utils import urlencode from searx.utils import html_to_text, match_language
from searx.utils import match_language
# engine dependent config # engine dependent config
categories = None categories = None

View File

@ -12,7 +12,7 @@
import json import json
from datetime import datetime from datetime import datetime
from searx.url_utils import urlencode, urljoin, urlparse from urllib.parse import urlencode, urljoin, urlparse
# engine dependent config # engine dependent config
categories = ['general', 'images', 'news', 'social media'] categories = ['general', 'images', 'news', 'social media']

View File

@ -11,7 +11,7 @@
""" """
from json import loads, dumps from json import loads, dumps
from searx.utils import html_to_text from urllib.parse import html_to_text
# engine dependent config # engine dependent config
categories = ['science'] categories = ['science']
@ -29,7 +29,7 @@ def request(query, params):
params['url'] = search_url params['url'] = search_url
params['method'] = 'POST' params['method'] = 'POST'
params['headers']['Content-type'] = "application/json" params['headers']['Content-type'] = "application/json"
params['data'] = dumps({"query": query.decode('utf-8'), params['data'] = dumps({"query": query.decode(),
"searchField": "ALL", "searchField": "ALL",
"sortDirection": "ASC", "sortDirection": "ASC",
"sortOrder": "RELEVANCY", "sortOrder": "RELEVANCY",

View File

@ -11,7 +11,7 @@
""" """
from json import loads from json import loads
from searx.url_utils import urlencode from urllib.parse import urlencode
# engine dependent config # engine dependent config

View File

@ -11,7 +11,7 @@
""" """
from json import loads from json import loads
from searx.url_utils import urlencode from urllib.parse import urlencode
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']

View File

@ -11,7 +11,7 @@
from lxml import html from lxml import html
from json import loads from json import loads
from operator import itemgetter from operator import itemgetter
from searx.url_utils import quote, urljoin from urllib.parse import quote, urljoin
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text

View File

@ -14,14 +14,11 @@ import re
from json import loads from json import loads
from lxml import html from lxml import html
from dateutil import parser from dateutil import parser
from io import StringIO
from urllib.parse import quote_plus, urlencode
from searx import logger from searx import logger
from searx.poolrequests import get as http_get from searx.poolrequests import get as http_get
from searx.url_utils import quote_plus, urlencode
try:
from cStringIO import StringIO
except:
from io import StringIO
# engine dependent config # engine dependent config
categories = ['music'] categories = ['music']
@ -61,7 +58,7 @@ def get_client_id():
# gets app_js and searches for the clientid # gets app_js and searches for the clientid
response = http_get(app_js_url) response = http_get(app_js_url)
if response.ok: if response.ok:
cids = cid_re.search(response.content.decode("utf-8")) cids = cid_re.search(response.content.decode())
if cids is not None and len(cids.groups()): if cids is not None and len(cids.groups()):
return cids.groups()[0] return cids.groups()[0]
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")

View File

@ -11,7 +11,7 @@
""" """
from json import loads from json import loads
from searx.url_utils import urlencode from urllib.parse import urlencode
import requests import requests
import base64 import base64
@ -39,8 +39,8 @@ def request(query, params):
'https://accounts.spotify.com/api/token', 'https://accounts.spotify.com/api/token',
data={'grant_type': 'client_credentials'}, data={'grant_type': 'client_credentials'},
headers={'Authorization': 'Basic ' + base64.b64encode( headers={'Authorization': 'Basic ' + base64.b64encode(
"{}:{}".format(api_client_id, api_client_secret).encode('utf-8') "{}:{}".format(api_client_id, api_client_secret).encode()
).decode('utf-8')} ).decode()}
) )
j = loads(r.text) j = loads(r.text)
params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))} params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))}
@ -59,7 +59,7 @@ def response(resp):
if result['type'] == 'track': if result['type'] == 'track':
title = result['name'] title = result['name']
url = result['external_urls']['spotify'] url = result['external_urls']['spotify']
content = u'{} - {} - {}'.format( content = '{} - {} - {}'.format(
result['artists'][0]['name'], result['artists'][0]['name'],
result['album']['name'], result['album']['name'],
result['name']) result['name'])

View File

@ -10,9 +10,9 @@
@parse url, title, content @parse url, title, content
""" """
from urllib.parse import urlencode, urljoin
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config # engine dependent config
categories = ['it'] categories = ['it']

View File

@ -11,10 +11,10 @@
""" """
import re import re
from urllib.parse import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from datetime import datetime from datetime import datetime
from searx.url_utils import urlencode
from searx.utils import get_torrent_size, int_or_zero from searx.utils import get_torrent_size, int_or_zero
# engine dependent config # engine dependent config

View File

@ -12,10 +12,10 @@
""" """
import re import re
from urllib.parse import urlencode
from lxml import html from lxml import html
from datetime import datetime from datetime import datetime
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode
from searx.utils import get_torrent_size from searx.utils import get_torrent_size
# engine dependent config # engine dependent config

View File

@ -12,8 +12,8 @@ import re
from searx.utils import is_valid_lang from searx.utils import is_valid_lang
categories = ['general'] categories = ['general']
url = u'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
web_url = u'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
weight = 100 weight = 100
parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
@ -39,9 +39,9 @@ def request(query, params):
key_form = '' key_form = ''
params['url'] = url.format(from_lang=from_lang[1], params['url'] = url.format(from_lang=from_lang[1],
to_lang=to_lang[1], to_lang=to_lang[1],
query=query.decode('utf-8'), query=query.decode(),
key=key_form) key=key_form)
params['query'] = query.decode('utf-8') params['query'] = query.decode()
params['from_lang'] = from_lang params['from_lang'] = from_lang
params['to_lang'] = to_lang params['to_lang'] = to_lang

View File

@ -12,10 +12,10 @@
@todo publishedDate @todo publishedDate
""" """
from urllib.parse import urlencode, urljoin
from lxml import html from lxml import html
from datetime import datetime from datetime import datetime
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode, urljoin
# engine dependent config # engine dependent config
categories = ['social media'] categories = ['social media']

View File

@ -10,7 +10,7 @@
@parse url, title, img_src, thumbnail_src @parse url, title, img_src, thumbnail_src
""" """
from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
from json import loads from json import loads
url = 'https://unsplash.com/' url = 'https://unsplash.com/'

View File

@ -12,9 +12,9 @@
# @todo rewrite to api # @todo rewrite to api
# @todo set content-parameter with correct data # @todo set content-parameter with correct data
from urllib.parse import urlencode
from json import loads from json import loads
from dateutil import parser from dateutil import parser
from searx.url_utils import urlencode
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']

View File

@ -15,9 +15,9 @@ from searx import logger
from searx.poolrequests import get from searx.poolrequests import get
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode
from searx.utils import match_language, eval_xpath from searx.utils import match_language, eval_xpath
from urllib.parse import urlencode
from json import loads from json import loads
from lxml.html import fromstring from lxml.html import fromstring
from lxml import etree from lxml import etree
@ -76,7 +76,7 @@ def request(query, params):
def response(resp): def response(resp):
results = [] results = []
htmlparser = etree.HTMLParser() htmlparser = etree.HTMLParser()
html = fromstring(resp.content.decode("utf-8"), parser=htmlparser) html = fromstring(resp.content.decode(), parser=htmlparser)
search_results = eval_xpath(html, wikidata_ids_xpath) search_results = eval_xpath(html, wikidata_ids_xpath)
if resp.search_params['language'].split('-')[0] == 'all': if resp.search_params['language'].split('-')[0] == 'all':
@ -89,7 +89,7 @@ def response(resp):
wikidata_id = search_result.split('/')[-1] wikidata_id = search_result.split('/')[-1]
url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language})) url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
htmlresponse = get(url) htmlresponse = get(url)
jsonresponse = loads(htmlresponse.content.decode("utf-8")) jsonresponse = loads(htmlresponse.content.decode())
results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'], htmlparser) results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'], htmlparser)
return results return results
@ -453,16 +453,16 @@ def get_geolink(result):
latitude, longitude = coordinates.split(',') latitude, longitude = coordinates.split(',')
# convert to decimal # convert to decimal
lat = int(latitude[:latitude.find(u'°')]) lat = int(latitude[:latitude.find('°')])
if latitude.find('\'') >= 0: if latitude.find('\'') >= 0:
lat += int(latitude[latitude.find(u'°') + 1:latitude.find('\'')] or 0) / 60.0 lat += int(latitude[latitude.find('°') + 1:latitude.find('\'')] or 0) / 60.0
if latitude.find('"') >= 0: if latitude.find('"') >= 0:
lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0 lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0
if latitude.find('S') >= 0: if latitude.find('S') >= 0:
lat *= -1 lat *= -1
lon = int(longitude[:longitude.find(u'°')]) lon = int(longitude[:longitude.find('°')])
if longitude.find('\'') >= 0: if longitude.find('\'') >= 0:
lon += int(longitude[longitude.find(u'°') + 1:longitude.find('\'')] or 0) / 60.0 lon += int(longitude[longitude.find('°') + 1:longitude.find('\'')] or 0) / 60.0
if longitude.find('"') >= 0: if longitude.find('"') >= 0:
lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0 lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0
if longitude.find('W') >= 0: if longitude.find('W') >= 0:

View File

@ -10,13 +10,13 @@
@parse url, infobox @parse url, infobox
""" """
from urllib.parse import quote
from json import loads from json import loads
from lxml.html import fromstring from lxml.html import fromstring
from searx.url_utils import quote
from searx.utils import match_language, searx_useragent from searx.utils import match_language, searx_useragent
# search-url # search-url
search_url = u'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'

View File

@ -9,7 +9,7 @@
# @parse url, infobox # @parse url, infobox
from lxml import etree from lxml import etree
from searx.url_utils import urlencode from urllib.parse import urlencode
# search-url # search-url
search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}' search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
@ -45,15 +45,15 @@ def request(query, params):
# replace private user area characters to make text legible # replace private user area characters to make text legible
def replace_pua_chars(text): def replace_pua_chars(text):
pua_chars = {u'\uf522': u'\u2192', # rigth arrow pua_chars = {'\uf522': '\u2192', # rigth arrow
u'\uf7b1': u'\u2115', # set of natural numbers '\uf7b1': '\u2115', # set of natural numbers
u'\uf7b4': u'\u211a', # set of rational numbers '\uf7b4': '\u211a', # set of rational numbers
u'\uf7b5': u'\u211d', # set of real numbers '\uf7b5': '\u211d', # set of real numbers
u'\uf7bd': u'\u2124', # set of integer numbers '\uf7bd': '\u2124', # set of integer numbers
u'\uf74c': 'd', # differential '\uf74c': 'd', # differential
u'\uf74d': u'\u212f', # euler's number '\uf74d': '\u212f', # euler's number
u'\uf74e': 'i', # imaginary number '\uf74e': 'i', # imaginary number
u'\uf7d9': '='} # equals sign '\uf7d9': '='} # equals sign
for k, v in pua_chars.items(): for k, v in pua_chars.items():
text = text.replace(k, v) text = text.replace(k, v)

View File

@ -10,9 +10,9 @@
from json import loads from json import loads
from time import time from time import time
from urllib.parse import urlencode
from searx.poolrequests import get as http_get from searx.poolrequests import get as http_get
from searx.url_utils import urlencode
# search-url # search-url
url = 'https://www.wolframalpha.com/' url = 'https://www.wolframalpha.com/'

View File

@ -11,7 +11,7 @@
""" """
from lxml import html from lxml import html
from searx.url_utils import urlencode, urljoin from urllib.parse import urlencode, urljoin
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
# engine dependent config # engine dependent config

View File

@ -1,7 +1,7 @@
from urllib.parse import unquote, urlencode, urljoin, urlparse
from lxml import html from lxml import html
from lxml.etree import _ElementStringResult, _ElementUnicodeResult from lxml.etree import _ElementStringResult, _ElementUnicodeResult
from searx.utils import html_to_text, eval_xpath from searx.utils import html_to_text, eval_xpath
from searx.url_utils import unquote, urlencode, urljoin, urlparse
search_url = None search_url = None
url_xpath = None url_xpath = None
@ -56,7 +56,7 @@ def extract_url(xpath_results, search_url):
if url.startswith('//'): if url.startswith('//'):
# add http or https to this kind of url //example.com/ # add http or https to this kind of url //example.com/
parsed_search_url = urlparse(search_url) parsed_search_url = urlparse(search_url)
url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url) url = '{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
elif url.startswith('/'): elif url.startswith('/'):
# fix relative url to the search engine # fix relative url to the search engine
url = urljoin(search_url, url) url = urljoin(search_url, url)
@ -86,7 +86,7 @@ def normalize_url(url):
p = parsed_url.path p = parsed_url.path
mark = p.find('/**') mark = p.find('/**')
if mark != -1: if mark != -1:
return unquote(p[mark + 3:]).decode('utf-8') return unquote(p[mark + 3:]).decode()
return url return url

View File

@ -14,7 +14,7 @@
from json import loads from json import loads
from dateutil import parser from dateutil import parser
from searx.url_utils import urlencode from urllib.parse import urlencode
from searx.utils import html_to_text from searx.utils import html_to_text

View File

@ -11,9 +11,9 @@
@parse url, title, content, suggestion @parse url, title, content, suggestion
""" """
from urllib.parse import unquote, urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx.url_utils import unquote, urlencode
from searx.utils import match_language, eval_xpath from searx.utils import match_language, eval_xpath
# engine dependent config # engine dependent config

View File

@ -11,13 +11,13 @@
import re import re
from datetime import datetime, timedelta from datetime import datetime, timedelta
from urllib.parse import urlencode
from lxml import html from lxml import html
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import ( from searx.engines.yahoo import (
parse_url, _fetch_supported_languages, supported_languages_url, language_aliases parse_url, _fetch_supported_languages, supported_languages_url, language_aliases
) )
from dateutil import parser from dateutil import parser
from searx.url_utils import urlencode
from searx.utils import match_language from searx.utils import match_language
# engine dependent config # engine dependent config
@ -58,7 +58,7 @@ def request(query, params):
def sanitize_url(url): def sanitize_url(url):
if ".yahoo.com/" in url: if ".yahoo.com/" in url:
return re.sub(u"\\;\\_ylt\\=.+$", "", url) return re.sub("\\;\\_ylt\\=.+$", "", url)
else: else:
return url return url

View File

@ -9,9 +9,9 @@
@parse url, title, content @parse url, title, content
""" """
from urllib.parse import urlencode
from lxml import html from lxml import html
from searx import logger from searx import logger
from searx.url_utils import urlencode
logger = logger.getChild('yandex engine') logger = logger.getChild('yandex engine')

View File

@ -11,8 +11,8 @@
from lxml import html from lxml import html
from operator import itemgetter from operator import itemgetter
from datetime import datetime from datetime import datetime
from urllib.parse import quote
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import quote
from searx.utils import get_torrent_size from searx.utils import get_torrent_size
from searx.poolrequests import get as http_get from searx.poolrequests import get as http_get

View File

@ -10,7 +10,7 @@
from json import loads from json import loads
from dateutil import parser from dateutil import parser
from searx.url_utils import urlencode from urllib.parse import urlencode
# engine dependent config # engine dependent config
categories = ['videos', 'music'] categories = ['videos', 'music']

View File

@ -10,9 +10,9 @@
from functools import reduce from functools import reduce
from json import loads from json import loads
from urllib.parse import quote_plus
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.utils import list_get from searx.utils import list_get
from searx.url_utils import quote_plus
# engine dependent config # engine dependent config
categories = ['videos', 'music'] categories = ['videos', 'music']

View File

@ -23,7 +23,7 @@ def get_bang_url(search_query):
""" """
if search_query.external_bang: if search_query.external_bang:
query = search_query.query.decode('utf-8', 'ignore') query = search_query.query.decode(errors='ignore')
bang = _get_bang(search_query.external_bang) bang = _get_bang(search_query.external_bang)
if bang and query: if bang and query:

View File

@ -3,73 +3,73 @@
# this file is generated automatically by utils/update_search_languages.py # this file is generated automatically by utils/update_search_languages.py
language_codes = ( language_codes = (
(u"af-NA", u"Afrikaans", u"", u"Afrikaans"), ("af-NA", "Afrikaans", "", "Afrikaans"),
(u"ar-SA", u"العربية", u"", u"Arabic"), ("ar-SA", "العربية", "", "Arabic"),
(u"be-BY", u"Беларуская", u"", u"Belarusian"), ("be-BY", "Беларуская", "", "Belarusian"),
(u"bg-BG", u"Български", u"", u"Bulgarian"), ("bg-BG", "Български", "", "Bulgarian"),
(u"ca-AD", u"Català", u"", u"Catalan"), ("ca-AD", "Català", "", "Catalan"),
(u"cs-CZ", u"Čeština", u"", u"Czech"), ("cs-CZ", "Čeština", "", "Czech"),
(u"da-DK", u"Dansk", u"", u"Danish"), ("da-DK", "Dansk", "", "Danish"),
(u"de", u"Deutsch", u"", u"German"), ("de", "Deutsch", "", "German"),
(u"de-AT", u"Deutsch", u"Österreich", u"German"), ("de-AT", "Deutsch", "Österreich", "German"),
(u"de-CH", u"Deutsch", u"Schweiz", u"German"), ("de-CH", "Deutsch", "Schweiz", "German"),
(u"de-DE", u"Deutsch", u"Deutschland", u"German"), ("de-DE", "Deutsch", "Deutschland", "German"),
(u"el-GR", u"Ελληνικά", u"", u"Greek"), ("el-GR", "Ελληνικά", "", "Greek"),
(u"en", u"English", u"", u"English"), ("en", "English", "", "English"),
(u"en-AU", u"English", u"Australia", u"English"), ("en-AU", "English", "Australia", "English"),
(u"en-CA", u"English", u"Canada", u"English"), ("en-CA", "English", "Canada", "English"),
(u"en-GB", u"English", u"United Kingdom", u"English"), ("en-GB", "English", "United Kingdom", "English"),
(u"en-IE", u"English", u"Ireland", u"English"), ("en-IE", "English", "Ireland", "English"),
(u"en-IN", u"English", u"India", u"English"), ("en-IN", "English", "India", "English"),
(u"en-NZ", u"English", u"New Zealand", u"English"), ("en-NZ", "English", "New Zealand", "English"),
(u"en-PH", u"English", u"Philippines", u"English"), ("en-PH", "English", "Philippines", "English"),
(u"en-SG", u"English", u"Singapore", u"English"), ("en-SG", "English", "Singapore", "English"),
(u"en-US", u"English", u"United States", u"English"), ("en-US", "English", "United States", "English"),
(u"es", u"Español", u"", u"Spanish"), ("es", "Español", "", "Spanish"),
(u"es-AR", u"Español", u"Argentina", u"Spanish"), ("es-AR", "Español", "Argentina", "Spanish"),
(u"es-CL", u"Español", u"Chile", u"Spanish"), ("es-CL", "Español", "Chile", "Spanish"),
(u"es-ES", u"Español", u"España", u"Spanish"), ("es-ES", "Español", "España", "Spanish"),
(u"es-MX", u"Español", u"México", u"Spanish"), ("es-MX", "Español", "México", "Spanish"),
(u"et-EE", u"Eesti", u"", u"Estonian"), ("et-EE", "Eesti", "", "Estonian"),
(u"fa-IR", u"فارسی", u"", u"Persian"), ("fa-IR", "فارسی", "", "Persian"),
(u"fi-FI", u"Suomi", u"", u"Finnish"), ("fi-FI", "Suomi", "", "Finnish"),
(u"fr", u"Français", u"", u"French"), ("fr", "Français", "", "French"),
(u"fr-BE", u"Français", u"Belgique", u"French"), ("fr-BE", "Français", "Belgique", "French"),
(u"fr-CA", u"Français", u"Canada", u"French"), ("fr-CA", "Français", "Canada", "French"),
(u"fr-CH", u"Français", u"Suisse", u"French"), ("fr-CH", "Français", "Suisse", "French"),
(u"fr-FR", u"Français", u"France", u"French"), ("fr-FR", "Français", "France", "French"),
(u"he-IL", u"עברית", u"", u"Hebrew"), ("he-IL", "עברית", "", "Hebrew"),
(u"hr-HR", u"Hrvatski", u"", u"Croatian"), ("hr-HR", "Hrvatski", "", "Croatian"),
(u"hu-HU", u"Magyar", u"", u"Hungarian"), ("hu-HU", "Magyar", "", "Hungarian"),
(u"hy-AM", u"Հայերեն", u"", u"Armenian"), ("hy-AM", "Հայերեն", "", "Armenian"),
(u"id-ID", u"Indonesia", u"", u"Indonesian"), ("id-ID", "Indonesia", "", "Indonesian"),
(u"is-IS", u"Íslenska", u"", u"Icelandic"), ("is-IS", "Íslenska", "", "Icelandic"),
(u"it-IT", u"Italiano", u"", u"Italian"), ("it-IT", "Italiano", "", "Italian"),
(u"ja-JP", u"日本語", u"", u"Japanese"), ("ja-JP", "日本語", "", "Japanese"),
(u"ko-KR", u"한국어", u"", u"Korean"), ("ko-KR", "한국어", "", "Korean"),
(u"lt-LT", u"Lietuvių", u"", u"Lithuanian"), ("lt-LT", "Lietuvių", "", "Lithuanian"),
(u"lv-LV", u"Latviešu", u"", u"Latvian"), ("lv-LV", "Latviešu", "", "Latvian"),
(u"ms-MY", u"Melayu", u"", u"Malay"), ("ms-MY", "Melayu", "", "Malay"),
(u"nb-NO", u"Norsk Bokmål", u"", u"Norwegian Bokmål"), ("nb-NO", "Norsk Bokmål", "", "Norwegian Bokmål"),
(u"nl", u"Nederlands", u"", u"Dutch"), ("nl", "Nederlands", "", "Dutch"),
(u"nl-BE", u"Nederlands", u"België", u"Dutch"), ("nl-BE", "Nederlands", "België", "Dutch"),
(u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"), ("nl-NL", "Nederlands", "Nederland", "Dutch"),
(u"pl-PL", u"Polski", u"", u"Polish"), ("pl-PL", "Polski", "", "Polish"),
(u"pt", u"Português", u"", u"Portuguese"), ("pt", "Português", "", "Portuguese"),
(u"pt-BR", u"Português", u"Brasil", u"Portuguese"), ("pt-BR", "Português", "Brasil", "Portuguese"),
(u"pt-PT", u"Português", u"Portugal", u"Portuguese"), ("pt-PT", "Português", "Portugal", "Portuguese"),
(u"ro-RO", u"Română", u"", u"Romanian"), ("ro-RO", "Română", "", "Romanian"),
(u"ru-RU", u"Русский", u"", u"Russian"), ("ru-RU", "Русский", "", "Russian"),
(u"sk-SK", u"Slovenčina", u"", u"Slovak"), ("sk-SK", "Slovenčina", "", "Slovak"),
(u"sl-SI", u"Slovenščina", u"", u"Slovenian"), ("sl-SI", "Slovenščina", "", "Slovenian"),
(u"sr-RS", u"Srpski", u"", u"Serbian"), ("sr-RS", "Srpski", "", "Serbian"),
(u"sv-SE", u"Svenska", u"", u"Swedish"), ("sv-SE", "Svenska", "", "Swedish"),
(u"sw-KE", u"Kiswahili", u"", u"Swahili"), ("sw-KE", "Kiswahili", "", "Swahili"),
(u"th-TH", u"ไทย", u"", u"Thai"), ("th-TH", "ไทย", "", "Thai"),
(u"tr-TR", u"Türkçe", u"", u"Turkish"), ("tr-TR", "Türkçe", "", "Turkish"),
(u"uk-UA", u"Українська", u"", u"Ukrainian"), ("uk-UA", "Українська", "", "Ukrainian"),
(u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"), ("vi-VN", "Tiếng Việt", "", "Vietnamese"),
(u"zh", u"中文", u"", u"Chinese"), ("zh", "中文", "", "Chinese"),
(u"zh-CN", u"中文", u"中国", u"Chinese"), ("zh-CN", "中文", "中国", "Chinese"),
(u"zh-TW", u"中文", u"台灣", u"Chinese") ("zh-TW", "中文", "台灣", "Chinese")
) )

View File

@ -20,13 +20,10 @@ from importlib import import_module
from os import listdir, makedirs, remove, stat, utime from os import listdir, makedirs, remove, stat, utime
from os.path import abspath, basename, dirname, exists, join from os.path import abspath, basename, dirname, exists, join
from shutil import copyfile from shutil import copyfile
from sys import version_info
from traceback import print_exc from traceback import print_exc
from searx import logger, settings, static_path from searx import logger, settings, static_path
if version_info[0] == 3:
unicode = str
logger = logger.getChild('plugins') logger = logger.getChild('plugins')
@ -38,8 +35,8 @@ from searx.plugins import (oa_doi_rewrite,
tracker_url_remover, tracker_url_remover,
vim_hotkeys) vim_hotkeys)
required_attrs = (('name', (str, unicode)), required_attrs = (('name', str),
('description', (str, unicode)), ('description', str),
('default_on', bool)) ('default_on', bool))
optional_attrs = (('js_dependencies', tuple), optional_attrs = (('js_dependencies', tuple),

View File

@ -16,17 +16,14 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
''' '''
import re import re
import sys from urllib.parse import urlparse
from lxml import etree from lxml import etree
from os import listdir, environ from os import listdir, environ
from os.path import isfile, isdir, join from os.path import isfile, isdir, join
from searx.plugins import logger from searx.plugins import logger
from flask_babel import gettext from flask_babel import gettext
from searx import searx_dir from searx import searx_dir
from searx.url_utils import urlparse
if sys.version_info[0] == 3:
unicode = str
name = "HTTPS rewrite" name = "HTTPS rewrite"
description = gettext('Rewrite HTTP links to HTTPS if possible') description = gettext('Rewrite HTTP links to HTTPS if possible')

View File

@ -1,6 +1,6 @@
from urllib.parse import urlparse, parse_qsl
from flask_babel import gettext from flask_babel import gettext
import re import re
from searx.url_utils import urlparse, parse_qsl
from searx import settings from searx import settings

View File

@ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
from flask_babel import gettext from flask_babel import gettext
import re import re
from searx.url_utils import urlunparse, parse_qsl, urlencode from urllib.parse import urlunparse, parse_qsl, urlencode
regexes = {re.compile(r'utm_[^&]+'), regexes = {re.compile(r'utm_[^&]+'),
re.compile(r'(wkey|wemail)[^&]*'), re.compile(r'(wkey|wemail)[^&]*'),

View File

@ -6,16 +6,11 @@
from base64 import urlsafe_b64encode, urlsafe_b64decode from base64 import urlsafe_b64encode, urlsafe_b64decode
from zlib import compress, decompress from zlib import compress, decompress
from sys import version from urllib.parse import parse_qs, urlencode
from searx import settings, autocomplete from searx import settings, autocomplete
from searx.languages import language_codes as languages from searx.languages import language_codes as languages
from searx.utils import match_language from searx.utils import match_language
from searx.url_utils import parse_qs, urlencode
if version[0] == '3':
# pylint: disable=invalid-name
unicode = str
COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5 # 5 years
@ -402,14 +397,14 @@ class Preferences(object):
settings_kv['tokens'] = ','.join(self.tokens.values) settings_kv['tokens'] = ','.join(self.tokens.values)
return urlsafe_b64encode(compress(urlencode(settings_kv).encode('utf-8'))).decode('utf-8') return urlsafe_b64encode(compress(urlencode(settings_kv).encode())).decode()
def parse_encoded_data(self, input_data): def parse_encoded_data(self, input_data):
"""parse (base64) preferences from request (``flask.request.form['preferences']``)""" """parse (base64) preferences from request (``flask.request.form['preferences']``)"""
decoded_data = decompress(urlsafe_b64decode(input_data.encode('utf-8'))) decoded_data = decompress(urlsafe_b64decode(input_data.encode()))
dict_data = {} dict_data = {}
for x, y in parse_qs(decoded_data).items(): for x, y in parse_qs(decoded_data).items():
dict_data[x.decode('utf8')] = y[0].decode('utf8') dict_data[x.decode()] = y[0].decode()
self.parse_dict(dict_data) self.parse_dict(dict_data)
def parse_dict(self, input_data): def parse_dict(self, input_data):

View File

@ -17,15 +17,13 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at> (C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at>
''' '''
import re
from searx.languages import language_codes from searx.languages import language_codes
from searx.engines import ( from searx.engines import (
categories, engines, engine_shortcuts categories, engines, engine_shortcuts
) )
import re
import sys
if sys.version_info[0] == 3:
unicode = str
VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$') VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
@ -93,7 +91,7 @@ class RawTextQuery(object):
# check if any language-code is equal with # check if any language-code is equal with
# declared language-codes # declared language-codes
for lc in language_codes: for lc in language_codes:
lang_id, lang_name, country, english_name = map(unicode.lower, lc) lang_id, lang_name, country, english_name = map(str.lower, lc)
# if correct language-code is found # if correct language-code is found
# set it as new search-language # set it as new search-language
@ -177,7 +175,7 @@ class RawTextQuery(object):
def getFullQuery(self): def getFullQuery(self):
# get full querry including whitespaces # get full querry including whitespaces
return u''.join(self.query_parts) return ''.join(self.query_parts)
class SearchQuery(object): class SearchQuery(object):
@ -185,7 +183,7 @@ class SearchQuery(object):
def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range, def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range,
timeout_limit=None, preferences=None, external_bang=None): timeout_limit=None, preferences=None, external_bang=None):
self.query = query.encode('utf-8') self.query = query.encode()
self.engines = engines self.engines = engines
self.categories = categories self.categories = categories
self.lang = lang self.lang = lang

View File

@ -1,14 +1,11 @@
import re import re
import sys
from collections import defaultdict from collections import defaultdict
from operator import itemgetter from operator import itemgetter
from threading import RLock from threading import RLock
from urllib.parse import urlparse, unquote
from searx import logger from searx import logger
from searx.engines import engines from searx.engines import engines
from searx.url_utils import urlparse, unquote
if sys.version_info[0] == 3:
basestring = str
CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
@ -16,7 +13,7 @@ WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
# return the meaningful length of the content for a result # return the meaningful length of the content for a result
def result_content_len(content): def result_content_len(content):
if isinstance(content, basestring): if isinstance(content, str):
return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content)) return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
else: else:
return 0 return 0
@ -161,11 +158,11 @@ class ResultContainer(object):
self._number_of_results.append(result['number_of_results']) self._number_of_results.append(result['number_of_results'])
else: else:
# standard result (url, title, content) # standard result (url, title, content)
if 'url' in result and not isinstance(result['url'], basestring): if 'url' in result and not isinstance(result['url'], str):
logger.debug('result: invalid URL: %s', str(result)) logger.debug('result: invalid URL: %s', str(result))
elif 'title' in result and not isinstance(result['title'], basestring): elif 'title' in result and not isinstance(result['title'], str):
logger.debug('result: invalid title: %s', str(result)) logger.debug('result: invalid title: %s', str(result))
elif 'content' in result and not isinstance(result['content'], basestring): elif 'content' in result and not isinstance(result['content'], str):
logger.debug('result: invalid content: %s', str(result)) logger.debug('result: invalid content: %s', str(result))
else: else:
self._merge_result(result, standard_result_count + 1) self._merge_result(result, standard_result_count + 1)

View File

@ -20,8 +20,8 @@ import sys
import threading import threading
from time import time from time import time
from uuid import uuid4 from uuid import uuid4
from _thread import start_new_thread
import six
from flask_babel import gettext from flask_babel import gettext
import requests.exceptions import requests.exceptions
import searx.poolrequests as requests_lib import searx.poolrequests as requests_lib
@ -37,13 +37,6 @@ from searx import logger
from searx.plugins import plugins from searx.plugins import plugins
from searx.exceptions import SearxParameterException from searx.exceptions import SearxParameterException
try:
from thread import start_new_thread
except:
from _thread import start_new_thread
if sys.version_info[0] == 3:
unicode = str
logger = logger.getChild('search') logger = logger.getChild('search')
@ -355,11 +348,11 @@ def get_search_query_from_webapp(preferences, form):
load_default_categories = True load_default_categories = True
for pd_name, pd in form.items(): for pd_name, pd in form.items():
if pd_name == 'categories': if pd_name == 'categories':
query_categories.extend(categ for categ in map(unicode.strip, pd.split(',')) if categ in categories) query_categories.extend(categ for categ in map(str.strip, pd.split(',')) if categ in categories)
elif pd_name == 'engines': elif pd_name == 'engines':
pd_engines = [{'category': engines[engine].categories[0], pd_engines = [{'category': engines[engine].categories[0],
'name': engine} 'name': engine}
for engine in map(unicode.strip, pd.split(',')) if engine in engines] for engine in map(str.strip, pd.split(',')) if engine in engines]
if pd_engines: if pd_engines:
query_engines.extend(pd_engines) query_engines.extend(pd_engines)
load_default_categories = False load_default_categories = False
@ -434,7 +427,7 @@ class Search(object):
# This means there was a valid bang and the # This means there was a valid bang and the
# rest of the search does not need to be continued # rest of the search does not need to be continued
if isinstance(self.result_container.redirect_url, six.string_types): if isinstance(self.result_container.redirect_url, str):
return self.result_container return self.result_container
# start time # start time
start_time = time() start_time = time()

View File

@ -17,7 +17,7 @@ from unittest2 import TestCase
class SearxTestLayer: class SearxTestLayer:
"""Base layer for non-robot tests.""" """Base layer for non-robot tests."""
__name__ = u'SearxTestLayer' __name__ = 'SearxTestLayer'
@classmethod @classmethod
def setUp(cls): def setUp(cls):
@ -66,7 +66,7 @@ class SearxRobotLayer():
stderr=subprocess.STDOUT stderr=subprocess.STDOUT
) )
if hasattr(self.server.stdout, 'read1'): if hasattr(self.server.stdout, 'read1'):
print(self.server.stdout.read1(1024).decode('utf-8')) print(self.server.stdout.read1(1024).decode())
def tearDown(self): def tearDown(self):
os.kill(self.server.pid, 9) os.kill(self.server.pid, 9)

View File

@ -1,30 +0,0 @@
from sys import version_info
if version_info[0] == 2:
from urllib import quote, quote_plus, unquote, urlencode
from urlparse import parse_qs, parse_qsl, urljoin, urlparse, urlunparse, ParseResult
else:
from urllib.parse import (
parse_qs,
parse_qsl,
quote,
quote_plus,
unquote,
urlencode,
urljoin,
urlparse,
urlunparse,
ParseResult
)
__export__ = (parse_qs,
parse_qsl,
quote,
quote_plus,
unquote,
urlencode,
urljoin,
urlparse,
urlunparse,
ParseResult)

View File

@ -1,21 +1,22 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import os
import sys
import csv import csv
import hashlib import hashlib
import hmac import hmac
import os
import re import re
import json
from babel.core import get_global
from babel.dates import format_date
from codecs import getincrementalencoder from codecs import getincrementalencoder
from imp import load_source from imp import load_source
from numbers import Number from numbers import Number
from os.path import splitext, join from os.path import splitext, join
from io import open from io import open, StringIO
from random import choice from random import choice
from html.parser import HTMLParser
from lxml.etree import XPath from lxml.etree import XPath
import sys from babel.core import get_global
import json from babel.dates import format_date
from searx import settings from searx import settings
from searx.version import VERSION_STRING from searx.version import VERSION_STRING
@ -23,23 +24,6 @@ from searx.languages import language_codes
from searx import settings from searx import settings
from searx import logger from searx import logger
try:
from cStringIO import StringIO
except:
from io import StringIO
try:
from HTMLParser import HTMLParser
except:
from html.parser import HTMLParser
if sys.version_info[0] == 3:
unichr = chr
unicode = str
IS_PY2 = False
basestring = str
else:
IS_PY2 = True
logger = logger.getChild('utils') logger = logger.getChild('utils')
@ -75,19 +59,19 @@ def highlight_content(content, query):
if content.find('<') != -1: if content.find('<') != -1:
return content return content
query = query.decode('utf-8') query = query.decode()
if content.lower().find(query.lower()) > -1: if content.lower().find(query.lower()) > -1:
query_regex = u'({0})'.format(re.escape(query)) query_regex = '({0})'.format(re.escape(query))
content = re.sub(query_regex, '<span class="highlight">\\1</span>', content = re.sub(query_regex, '<span class="highlight">\\1</span>',
content, flags=re.I | re.U) content, flags=re.I | re.U)
else: else:
regex_parts = [] regex_parts = []
for chunk in query.split(): for chunk in query.split():
if len(chunk) == 1: if len(chunk) == 1:
regex_parts.append(u'\\W+{0}\\W+'.format(re.escape(chunk))) regex_parts.append('\\W+{0}\\W+'.format(re.escape(chunk)))
else: else:
regex_parts.append(u'{0}'.format(re.escape(chunk))) regex_parts.append('{0}'.format(re.escape(chunk)))
query_regex = u'({0})'.format('|'.join(regex_parts)) query_regex = '({0})'.format('|'.join(regex_parts))
content = re.sub(query_regex, '<span class="highlight">\\1</span>', content = re.sub(query_regex, '<span class="highlight">\\1</span>',
content, flags=re.I | re.U) content, flags=re.I | re.U)
@ -124,21 +108,21 @@ class HTMLTextExtractor(HTMLParser):
def handle_charref(self, number): def handle_charref(self, number):
if not self.is_valid_tag(): if not self.is_valid_tag():
return return
if number[0] in (u'x', u'X'): if number[0] in ('x', 'X'):
codepoint = int(number[1:], 16) codepoint = int(number[1:], 16)
else: else:
codepoint = int(number) codepoint = int(number)
self.result.append(unichr(codepoint)) self.result.append(chr(codepoint))
def handle_entityref(self, name): def handle_entityref(self, name):
if not self.is_valid_tag(): if not self.is_valid_tag():
return return
# codepoint = htmlentitydefs.name2codepoint[name] # codepoint = htmlentitydefs.name2codepoint[name]
# self.result.append(unichr(codepoint)) # self.result.append(chr(codepoint))
self.result.append(name) self.result.append(name)
def get_text(self): def get_text(self):
return u''.join(self.result).strip() return ''.join(self.result).strip()
def html_to_text(html): def html_to_text(html):
@ -163,22 +147,14 @@ class UnicodeWriter:
self.encoder = getincrementalencoder(encoding)() self.encoder = getincrementalencoder(encoding)()
def writerow(self, row): def writerow(self, row):
if IS_PY2:
row = [s.encode("utf-8") if hasattr(s, 'encode') else s for s in row]
self.writer.writerow(row) self.writer.writerow(row)
# Fetch UTF-8 output from the queue ... # Fetch UTF-8 output from the queue ...
data = self.queue.getvalue() data = self.queue.getvalue()
if IS_PY2: data = data.strip('\x00')
data = data.decode("utf-8")
else:
data = data.strip('\x00')
# ... and reencode it into the target encoding # ... and reencode it into the target encoding
data = self.encoder.encode(data) data = self.encoder.encode(data)
# write to the target stream # write to the target stream
if IS_PY2: self.stream.write(data.decode())
self.stream.write(data)
else:
self.stream.write(data.decode("utf-8"))
# empty queue # empty queue
self.queue.truncate(0) self.queue.truncate(0)
@ -253,7 +229,7 @@ def dict_subset(d, properties):
def prettify_url(url, max_length=74): def prettify_url(url, max_length=74):
if len(url) > max_length: if len(url) > max_length:
chunk_len = int(max_length / 2 + 1) chunk_len = int(max_length / 2 + 1)
return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:]) return '{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
else: else:
return url return url
@ -310,7 +286,7 @@ def int_or_zero(num):
def is_valid_lang(lang): def is_valid_lang(lang):
is_abbr = (len(lang) == 2) is_abbr = (len(lang) == 2)
lang = lang.lower().decode('utf-8') lang = lang.lower().decode()
if is_abbr: if is_abbr:
for l in language_codes: for l in language_codes:
if l[0][:2] == lang: if l[0][:2] == lang:
@ -407,17 +383,14 @@ def new_hmac(secret_key, url):
secret_key_bytes = secret_key secret_key_bytes = secret_key
else: else:
raise err raise err
if sys.version_info[0] == 2: return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest()
return hmac.new(bytes(secret_key), url, hashlib.sha256).hexdigest()
else:
return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest()
def to_string(obj): def to_string(obj):
if isinstance(obj, basestring): if isinstance(obj, str):
return obj return obj
if isinstance(obj, Number): if isinstance(obj, Number):
return unicode(obj) return str(obj)
if hasattr(obj, '__str__'): if hasattr(obj, '__str__'):
return obj.__str__() return obj.__str__()
if hasattr(obj, '__repr__'): if hasattr(obj, '__repr__'):
@ -433,9 +406,9 @@ def ecma_unescape(s):
""" """
# s = unicode(s) # s = unicode(s)
# "%u5409" becomes "吉" # "%u5409" becomes "吉"
s = ecma_unescape4_re.sub(lambda e: unichr(int(e.group(1), 16)), s) s = ecma_unescape4_re.sub(lambda e: chr(int(e.group(1), 16)), s)
# "%20" becomes " ", "%F3" becomes "ó" # "%20" becomes " ", "%F3" becomes "ó"
s = ecma_unescape2_re.sub(lambda e: unichr(int(e.group(1), 16)), s) s = ecma_unescape2_re.sub(lambda e: chr(int(e.group(1), 16)), s)
return s return s

View File

@ -17,37 +17,35 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
(C) 2013- by Adam Tauber, <asciimoo@gmail.com> (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
''' '''
import sys
if sys.version_info[0] < 3:
print('\033[1;31m Python2 is no longer supported\033[0m')
exit(1)
if __name__ == '__main__': if __name__ == '__main__':
from sys import path
from os.path import realpath, dirname from os.path import realpath, dirname
path.append(realpath(dirname(realpath(__file__)) + '/../')) sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
import hashlib import hashlib
import hmac import hmac
import json import json
import os import os
import sys
import requests import requests
from searx import logger from searx import logger
logger = logger.getChild('webapp') logger = logger.getChild('webapp')
try:
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
except:
logger.critical("cannot import dependency: pygments")
from sys import exit
exit(1)
try:
from cgi import escape
except:
from html import escape
from six import next
from datetime import datetime, timedelta from datetime import datetime, timedelta
from time import time from time import time
from html import escape
from io import StringIO
from urllib.parse import urlencode, urlparse, urljoin
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
from werkzeug.middleware.proxy_fix import ProxyFix from werkzeug.middleware.proxy_fix import ProxyFix
from flask import ( from flask import (
Flask, request, render_template, url_for, Response, make_response, Flask, request, render_template, url_for, Response, make_response,
@ -78,7 +76,6 @@ from searx.plugins import plugins
from searx.plugins.oa_doi_rewrite import get_doi_resolver from searx.plugins.oa_doi_rewrite import get_doi_resolver
from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
from searx.answerers import answerers from searx.answerers import answerers
from searx.url_utils import urlencode, urlparse, urljoin
from searx.utils import new_hmac from searx.utils import new_hmac
# check if the pyopenssl package is installed. # check if the pyopenssl package is installed.
@ -89,19 +86,6 @@ except ImportError:
logger.critical("The pyopenssl package has to be installed.\n" logger.critical("The pyopenssl package has to be installed.\n"
"Some HTTPS connections will fail") "Some HTTPS connections will fail")
try:
from cStringIO import StringIO
except:
from io import StringIO
if sys.version_info[0] == 3:
unicode = str
PY3 = True
else:
logger.warning('\033[1;31m Python2 is no longer supported\033[0m')
exit(1)
# serve pages with HTTP/1.1 # serve pages with HTTP/1.1
from werkzeug.serving import WSGIRequestHandler from werkzeug.serving import WSGIRequestHandler
WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0')) WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
@ -315,11 +299,11 @@ def proxify(url):
if not settings.get('result_proxy'): if not settings.get('result_proxy'):
return url return url
url_params = dict(mortyurl=url.encode('utf-8')) url_params = dict(mortyurl=url.encode())
if settings['result_proxy'].get('key'): if settings['result_proxy'].get('key'):
url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'], url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'],
url.encode('utf-8'), url.encode(),
hashlib.sha256).hexdigest() hashlib.sha256).hexdigest()
return '{0}?{1}'.format(settings['result_proxy']['url'], return '{0}?{1}'.format(settings['result_proxy']['url'],
@ -347,10 +331,10 @@ def image_proxify(url):
if settings.get('result_proxy'): if settings.get('result_proxy'):
return proxify(url) return proxify(url)
h = new_hmac(settings['server']['secret_key'], url.encode('utf-8')) h = new_hmac(settings['server']['secret_key'], url.encode())
return '{0}?{1}'.format(url_for('image_proxy'), return '{0}?{1}'.format(url_for('image_proxy'),
urlencode(dict(url=url.encode('utf-8'), h=h))) urlencode(dict(url=url.encode(), h=h)))
def render(template_name, override_theme=None, **kwargs): def render(template_name, override_theme=None, **kwargs):
@ -424,7 +408,7 @@ def render(template_name, override_theme=None, **kwargs):
kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab') kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
kwargs['unicode'] = unicode kwargs['unicode'] = str
kwargs['preferences'] = request.preferences kwargs['preferences'] = request.preferences
@ -612,7 +596,7 @@ def index():
if 'content' in result and result['content']: if 'content' in result and result['content']:
result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query) result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
if 'title' in result and result['title']: if 'title' in result and result['title']:
result['title'] = highlight_content(escape(result['title'] or u''), search_query.query) result['title'] = highlight_content(escape(result['title'] or ''), search_query.query)
else: else:
if result.get('content'): if result.get('content'):
result['content'] = html_to_text(result['content']).strip() result['content'] = html_to_text(result['content']).strip()
@ -634,14 +618,14 @@ def index():
minutes = int((timedifference.seconds / 60) % 60) minutes = int((timedifference.seconds / 60) % 60)
hours = int(timedifference.seconds / 60 / 60) hours = int(timedifference.seconds / 60 / 60)
if hours == 0: if hours == 0:
result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes) result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes)
else: else:
result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa
else: else:
result['publishedDate'] = format_date(result['publishedDate']) result['publishedDate'] = format_date(result['publishedDate'])
if output_format == 'json': if output_format == 'json':
return Response(json.dumps({'query': search_query.query.decode('utf-8'), return Response(json.dumps({'query': search_query.query.decode(),
'number_of_results': number_of_results, 'number_of_results': number_of_results,
'results': results, 'results': results,
'answers': list(result_container.answers), 'answers': list(result_container.answers),
@ -670,7 +654,7 @@ def index():
csv.writerow([row.get(key, '') for key in keys]) csv.writerow([row.get(key, '') for key in keys])
csv.stream.seek(0) csv.stream.seek(0)
response = Response(csv.stream.read(), mimetype='application/csv') response = Response(csv.stream.read(), mimetype='application/csv')
cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode('utf-8')) cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode())
response.headers.add('Content-Disposition', cont_disp) response.headers.add('Content-Disposition', cont_disp)
return response return response
@ -754,10 +738,7 @@ def autocompleter():
disabled_engines = request.preferences.engines.get_disabled() disabled_engines = request.preferences.engines.get_disabled()
# parse query # parse query
if PY3: raw_text_query = RawTextQuery(request.form.get('q', b''), disabled_engines)
raw_text_query = RawTextQuery(request.form.get('q', b''), disabled_engines)
else:
raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines)
raw_text_query.parse_query() raw_text_query.parse_query()
# check if search query is set # check if search query is set
@ -879,7 +860,7 @@ def _is_selected_language_supported(engine, preferences):
@app.route('/image_proxy', methods=['GET']) @app.route('/image_proxy', methods=['GET'])
def image_proxy(): def image_proxy():
url = request.args.get('url').encode('utf-8') url = request.args.get('url').encode()
if not url: if not url:
return '', 400 return '', 400

Some files were not shown because too many files have changed in this diff Show More