mirror of
https://github.com/searxng/searxng.git
synced 2024-11-22 12:10:11 +01:00
[fix] pep8 part II.
This commit is contained in:
parent
b0fd71b7b3
commit
5740cfbf1c
@ -28,7 +28,8 @@ except:
|
||||
searx_dir = abspath(dirname(__file__))
|
||||
engine_dir = dirname(realpath(__file__))
|
||||
|
||||
# if possible set path to settings using the enviroment variable SEARX_SETTINGS_PATH
|
||||
# if possible set path to settings using the
|
||||
# enviroment variable SEARX_SETTINGS_PATH
|
||||
if 'SEARX_SETTINGS_PATH' in environ:
|
||||
settings_path = environ['SEARX_SETTINGS_PATH']
|
||||
# otherwise using default path
|
||||
|
@ -41,7 +41,7 @@ def load_module(filename):
|
||||
module.name = modname
|
||||
return module
|
||||
|
||||
if not 'engines' in settings or not settings['engines']:
|
||||
if 'engines' not in settings or not settings['engines']:
|
||||
print '[E] Error no engines found. Edit your settings.yml'
|
||||
exit(2)
|
||||
|
||||
@ -68,15 +68,15 @@ for engine_data in settings['engines']:
|
||||
engine.categories = ['general']
|
||||
|
||||
if not hasattr(engine, 'language_support'):
|
||||
#engine.language_support = False
|
||||
# engine.language_support = False
|
||||
engine.language_support = True
|
||||
|
||||
if not hasattr(engine, 'timeout'):
|
||||
#engine.language_support = False
|
||||
# engine.language_support = False
|
||||
engine.timeout = settings['server']['request_timeout']
|
||||
|
||||
if not hasattr(engine, 'shortcut'):
|
||||
#engine.shortcut = '''
|
||||
# engine.shortcut = '''
|
||||
engine.shortcut = ''
|
||||
|
||||
# checking required variables
|
||||
@ -161,7 +161,8 @@ def get_engines_stats():
|
||||
|
||||
for engine in scores_per_result:
|
||||
if max_score_per_result:
|
||||
engine['percentage'] = int(engine['avg'] / max_score_per_result * 100)
|
||||
engine['percentage'] = int(engine['avg']
|
||||
/ max_score_per_result * 100)
|
||||
else:
|
||||
engine['percentage'] = 0
|
||||
|
||||
|
@ -31,30 +31,31 @@ class Query(object):
|
||||
def __init__(self, query, blocked_engines):
|
||||
self.query = query
|
||||
self.blocked_engines = []
|
||||
|
||||
|
||||
if blocked_engines:
|
||||
self.blocked_engines = blocked_engines
|
||||
|
||||
|
||||
self.query_parts = []
|
||||
self.engines = []
|
||||
self.languages = []
|
||||
|
||||
# parse query, if tags are set, which change the serch engine or search-language
|
||||
|
||||
# parse query, if tags are set, which
|
||||
# change the serch engine or search-language
|
||||
def parse_query(self):
|
||||
self.query_parts = []
|
||||
|
||||
|
||||
# split query, including whitespaces
|
||||
raw_query_parts = re.split(r'(\s+)', self.query)
|
||||
|
||||
|
||||
parse_next = True
|
||||
|
||||
|
||||
for query_part in raw_query_parts:
|
||||
if not parse_next:
|
||||
self.query_parts[-1] += query_part
|
||||
continue
|
||||
|
||||
|
||||
parse_next = False
|
||||
|
||||
|
||||
# part does only contain spaces, skip
|
||||
if query_part.isspace()\
|
||||
or query_part == '':
|
||||
@ -62,15 +63,17 @@ class Query(object):
|
||||
self.query_parts.append(query_part)
|
||||
continue
|
||||
|
||||
# this force a language
|
||||
# this force a language
|
||||
if query_part[0] == ':':
|
||||
lang = query_part[1:].lower()
|
||||
|
||||
# check if any language-code is equal with declared language-codes
|
||||
# check if any language-code is equal with
|
||||
# declared language-codes
|
||||
for lc in language_codes:
|
||||
lang_id, lang_name, country = map(str.lower, lc)
|
||||
|
||||
# if correct language-code is found, set it as new search-language
|
||||
# if correct language-code is found
|
||||
# set it as new search-language
|
||||
if lang == lang_id\
|
||||
or lang_id.startswith(lang)\
|
||||
or lang == lang_name\
|
||||
@ -89,23 +92,24 @@ class Query(object):
|
||||
parse_next = True
|
||||
self.engines.append({'category': 'none',
|
||||
'name': engine_shortcuts[prefix]})
|
||||
|
||||
|
||||
# check if prefix is equal with engine name
|
||||
elif prefix in engines\
|
||||
and not prefix in self.blocked_engines:
|
||||
and prefix not in self.blocked_engines:
|
||||
parse_next = True
|
||||
self.engines.append({'category': 'none',
|
||||
'name': prefix})
|
||||
|
||||
# check if prefix is equal with categorie name
|
||||
elif prefix in categories:
|
||||
# using all engines for that search, which are declared under that categorie name
|
||||
# using all engines for that search, which
|
||||
# are declared under that categorie name
|
||||
parse_next = True
|
||||
self.engines.extend({'category': prefix,
|
||||
'name': engine.name}
|
||||
for engine in categories[prefix]
|
||||
if not engine in self.blocked_engines)
|
||||
|
||||
if engine not in self.blocked_engines)
|
||||
|
||||
# append query part to query_part list
|
||||
self.query_parts.append(query_part)
|
||||
|
||||
@ -114,14 +118,13 @@ class Query(object):
|
||||
self.query_parts[-1] = search_query
|
||||
else:
|
||||
self.query_parts.append(search_query)
|
||||
|
||||
|
||||
def getSearchQuery(self):
|
||||
if len(self.query_parts):
|
||||
return self.query_parts[-1]
|
||||
else:
|
||||
return ''
|
||||
|
||||
|
||||
def getFullQuery(self):
|
||||
# get full querry including whitespaces
|
||||
return string.join(self.query_parts, '')
|
||||
|
||||
|
100
searx/search.py
100
searx/search.py
@ -22,7 +22,7 @@ from datetime import datetime
|
||||
from operator import itemgetter
|
||||
from urlparse import urlparse, unquote
|
||||
from searx.engines import (
|
||||
categories, engines, engine_shortcuts
|
||||
categories, engines
|
||||
)
|
||||
from searx.languages import language_codes
|
||||
from searx.utils import gen_useragent
|
||||
@ -39,7 +39,13 @@ def default_request_params():
|
||||
|
||||
|
||||
# create a callback wrapper for the search engine results
|
||||
def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params):
|
||||
def make_callback(engine_name,
|
||||
results,
|
||||
suggestions,
|
||||
answers,
|
||||
infoboxes,
|
||||
callback,
|
||||
params):
|
||||
|
||||
# creating a callback wrapper for the search engine results
|
||||
def process_callback(response, **kwargs):
|
||||
@ -95,7 +101,7 @@ def make_callback(engine_name, results, suggestions, answers, infoboxes, callbac
|
||||
def content_result_len(content):
|
||||
if isinstance(content, basestring):
|
||||
content = re.sub('[,;:!?\./\\\\ ()-_]', '', content)
|
||||
return len(content)
|
||||
return len(content)
|
||||
else:
|
||||
return 0
|
||||
|
||||
@ -126,7 +132,8 @@ def score_results(results):
|
||||
|
||||
# strip multiple spaces and cariage returns from content
|
||||
if 'content' in res:
|
||||
res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', ''))
|
||||
res['content'] = re.sub(' +', ' ',
|
||||
res['content'].strip().replace('\n', ''))
|
||||
|
||||
# get weight of this engine if possible
|
||||
if hasattr(engines[res['engine']], 'weight'):
|
||||
@ -139,8 +146,12 @@ def score_results(results):
|
||||
duplicated = False
|
||||
for new_res in results:
|
||||
# remove / from the end of the url if required
|
||||
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
|
||||
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
|
||||
p1 = res['parsed_url'].path[:-1]\
|
||||
if res['parsed_url'].path.endswith('/')\
|
||||
else res['parsed_url'].path
|
||||
p2 = new_res['parsed_url'].path[:-1]\
|
||||
if new_res['parsed_url'].path.endswith('/')\
|
||||
else new_res['parsed_url'].path
|
||||
|
||||
# check if that result is a duplicate
|
||||
if res['host'] == new_res['host'] and\
|
||||
@ -153,7 +164,8 @@ def score_results(results):
|
||||
# merge duplicates together
|
||||
if duplicated:
|
||||
# using content with more text
|
||||
if content_result_len(res.get('content', '')) > content_result_len(duplicated.get('content', '')):
|
||||
if content_result_len(res.get('content', '')) >\
|
||||
content_result_len(duplicated.get('content', '')):
|
||||
duplicated['content'] = res['content']
|
||||
|
||||
# increase result-score
|
||||
@ -182,17 +194,25 @@ def score_results(results):
|
||||
|
||||
for i, res in enumerate(results):
|
||||
# FIXME : handle more than one category per engine
|
||||
category = engines[res['engine']].categories[0] + ':' + '' if 'template' not in res else res['template']
|
||||
category = engines[res['engine']].categories[0] + ':' + ''\
|
||||
if 'template' not in res\
|
||||
else res['template']
|
||||
|
||||
current = None if category not in categoryPositions else categoryPositions[category]
|
||||
current = None if category not in categoryPositions\
|
||||
else categoryPositions[category]
|
||||
|
||||
# group with previous results using the same category if the group can accept more result and is not too far from the current position
|
||||
if current != None and (current['count'] > 0) and (len(gresults) - current['index'] < 20):
|
||||
# group with the previous results using the same category with this one
|
||||
# group with previous results using the same category
|
||||
# if the group can accept more result and is not too far
|
||||
# from the current position
|
||||
if current is not None and (current['count'] > 0)\
|
||||
and (len(gresults) - current['index'] < 20):
|
||||
# group with the previous results using
|
||||
# the same category with this one
|
||||
index = current['index']
|
||||
gresults.insert(index, res)
|
||||
|
||||
# update every index after the current one (including the current one)
|
||||
# update every index after the current one
|
||||
# (including the current one)
|
||||
for k in categoryPositions:
|
||||
v = categoryPositions[k]['index']
|
||||
if v >= index:
|
||||
@ -206,7 +226,7 @@ def score_results(results):
|
||||
gresults.append(res)
|
||||
|
||||
# update categoryIndex
|
||||
categoryPositions[category] = { 'index' : len(gresults), 'count' : 8 }
|
||||
categoryPositions[category] = {'index': len(gresults), 'count': 8}
|
||||
|
||||
# return gresults
|
||||
return gresults
|
||||
@ -215,21 +235,21 @@ def score_results(results):
|
||||
def merge_two_infoboxes(infobox1, infobox2):
|
||||
if 'urls' in infobox2:
|
||||
urls1 = infobox1.get('urls', None)
|
||||
if urls1 == None:
|
||||
if urls1 is None:
|
||||
urls1 = []
|
||||
infobox1.set('urls', urls1)
|
||||
|
||||
urlSet = set()
|
||||
for url in infobox1.get('urls', []):
|
||||
urlSet.add(url.get('url', None))
|
||||
|
||||
|
||||
for url in infobox2.get('urls', []):
|
||||
if url.get('url', None) not in urlSet:
|
||||
urls1.append(url)
|
||||
|
||||
if 'attributes' in infobox2:
|
||||
attributes1 = infobox1.get('attributes', None)
|
||||
if attributes1 == None:
|
||||
if attributes1 is None:
|
||||
attributes1 = []
|
||||
infobox1.set('attributes', attributes1)
|
||||
|
||||
@ -237,14 +257,14 @@ def merge_two_infoboxes(infobox1, infobox2):
|
||||
for attribute in infobox1.get('attributes', []):
|
||||
if attribute.get('label', None) not in attributeSet:
|
||||
attributeSet.add(attribute.get('label', None))
|
||||
|
||||
|
||||
for attribute in infobox2.get('attributes', []):
|
||||
attributes1.append(attribute)
|
||||
|
||||
if 'content' in infobox2:
|
||||
content1 = infobox1.get('content', None)
|
||||
content2 = infobox2.get('content', '')
|
||||
if content1 != None:
|
||||
if content1 is not None:
|
||||
if content_result_len(content2) > content_result_len(content1):
|
||||
infobox1['content'] = content2
|
||||
else:
|
||||
@ -257,12 +277,12 @@ def merge_infoboxes(infoboxes):
|
||||
for infobox in infoboxes:
|
||||
add_infobox = True
|
||||
infobox_id = infobox.get('id', None)
|
||||
if infobox_id != None:
|
||||
if infobox_id is not None:
|
||||
existingIndex = infoboxes_id.get(infobox_id, None)
|
||||
if existingIndex != None:
|
||||
if existingIndex is not None:
|
||||
merge_two_infoboxes(results[existingIndex], infobox)
|
||||
add_infobox=False
|
||||
|
||||
add_infobox = False
|
||||
|
||||
if add_infobox:
|
||||
results.append(infobox)
|
||||
infoboxes_id[infobox_id] = len(results)-1
|
||||
@ -318,7 +338,8 @@ class Search(object):
|
||||
|
||||
self.pageno = int(pageno_param)
|
||||
|
||||
# parse query, if tags are set, which change the serch engine or search-language
|
||||
# parse query, if tags are set, which change
|
||||
# the serch engine or search-language
|
||||
query_obj = Query(self.request_data['q'], self.blocked_engines)
|
||||
query_obj.parse_query()
|
||||
|
||||
@ -334,25 +355,29 @@ class Search(object):
|
||||
|
||||
self.categories = []
|
||||
|
||||
# if engines are calculated from query, set categories by using that informations
|
||||
# if engines are calculated from query,
|
||||
# set categories by using that informations
|
||||
if self.engines:
|
||||
self.categories = list(set(engine['category']
|
||||
for engine in self.engines))
|
||||
|
||||
# otherwise, using defined categories to calculate which engines should be used
|
||||
# otherwise, using defined categories to
|
||||
# calculate which engines should be used
|
||||
else:
|
||||
# set used categories
|
||||
for pd_name, pd in self.request_data.items():
|
||||
if pd_name.startswith('category_'):
|
||||
category = pd_name[9:]
|
||||
# if category is not found in list, skip
|
||||
if not category in categories:
|
||||
if category not in categories:
|
||||
continue
|
||||
|
||||
# add category to list
|
||||
self.categories.append(category)
|
||||
|
||||
# if no category is specified for this search, using user-defined default-configuration which (is stored in cookie)
|
||||
# if no category is specified for this search,
|
||||
# using user-defined default-configuration which
|
||||
# (is stored in cookie)
|
||||
if not self.categories:
|
||||
cookie_categories = request.cookies.get('categories', '')
|
||||
cookie_categories = cookie_categories.split(',')
|
||||
@ -360,16 +385,18 @@ class Search(object):
|
||||
if ccateg in categories:
|
||||
self.categories.append(ccateg)
|
||||
|
||||
# if still no category is specified, using general as default-category
|
||||
# if still no category is specified, using general
|
||||
# as default-category
|
||||
if not self.categories:
|
||||
self.categories = ['general']
|
||||
|
||||
# using all engines for that search, which are declared under the specific categories
|
||||
# using all engines for that search, which are
|
||||
# declared under the specific categories
|
||||
for categ in self.categories:
|
||||
self.engines.extend({'category': categ,
|
||||
'name': x.name}
|
||||
for x in categories[categ]
|
||||
if not x.name in self.blocked_engines)
|
||||
if x.name not in self.blocked_engines)
|
||||
|
||||
# do search-request
|
||||
def search(self, request):
|
||||
@ -386,7 +413,7 @@ class Search(object):
|
||||
number_of_searches += 1
|
||||
|
||||
# set default useragent
|
||||
#user_agent = request.headers.get('User-Agent', '')
|
||||
# user_agent = request.headers.get('User-Agent', '')
|
||||
user_agent = gen_useragent()
|
||||
|
||||
# start search-reqest for all selected engines
|
||||
@ -400,7 +427,8 @@ class Search(object):
|
||||
if self.pageno > 1 and not engine.paging:
|
||||
continue
|
||||
|
||||
# if search-language is set and engine does not provide language-support, skip
|
||||
# if search-language is set and engine does not
|
||||
# provide language-support, skip
|
||||
if self.lang != 'all' and not engine.language_support:
|
||||
continue
|
||||
|
||||
@ -412,7 +440,8 @@ class Search(object):
|
||||
request_params['pageno'] = self.pageno
|
||||
request_params['language'] = self.lang
|
||||
|
||||
# update request parameters dependent on search-engine (contained in engines folder)
|
||||
# update request parameters dependent on
|
||||
# search-engine (contained in engines folder)
|
||||
request_params = engine.request(self.query.encode('utf-8'),
|
||||
request_params)
|
||||
|
||||
@ -431,7 +460,8 @@ class Search(object):
|
||||
request_params
|
||||
)
|
||||
|
||||
# create dictionary which contain all informations about the request
|
||||
# create dictionary which contain all
|
||||
# informations about the request
|
||||
request_args = dict(
|
||||
headers=request_params['headers'],
|
||||
hooks=dict(response=callback),
|
||||
|
@ -1,4 +1,4 @@
|
||||
#import htmlentitydefs
|
||||
# import htmlentitydefs
|
||||
from codecs import getincrementalencoder
|
||||
from HTMLParser import HTMLParser
|
||||
from random import choice
|
||||
@ -22,7 +22,8 @@ def gen_useragent():
|
||||
|
||||
def searx_useragent():
|
||||
return 'searx'
|
||||
|
||||
|
||||
|
||||
def highlight_content(content, query):
|
||||
|
||||
if not content:
|
||||
@ -67,8 +68,8 @@ class HTMLTextExtractor(HTMLParser):
|
||||
self.result.append(unichr(codepoint))
|
||||
|
||||
def handle_entityref(self, name):
|
||||
#codepoint = htmlentitydefs.name2codepoint[name]
|
||||
#self.result.append(unichr(codepoint))
|
||||
# codepoint = htmlentitydefs.name2codepoint[name]
|
||||
# self.result.append(unichr(codepoint))
|
||||
self.result.append(name)
|
||||
|
||||
def get_text(self):
|
||||
|
@ -71,7 +71,7 @@ app.secret_key = settings['server']['secret_key']
|
||||
|
||||
babel = Babel(app)
|
||||
|
||||
#TODO configurable via settings.yml
|
||||
# TODO configurable via settings.yml
|
||||
favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud',
|
||||
'twitter', 'stackoverflow', 'github']
|
||||
|
||||
@ -146,14 +146,14 @@ def render(template_name, override_theme=None, **kwargs):
|
||||
|
||||
nonblocked_categories = set(chain.from_iterable(nonblocked_categories))
|
||||
|
||||
if not 'categories' in kwargs:
|
||||
if 'categories' not in kwargs:
|
||||
kwargs['categories'] = ['general']
|
||||
kwargs['categories'].extend(x for x in
|
||||
sorted(categories.keys())
|
||||
if x != 'general'
|
||||
and x in nonblocked_categories)
|
||||
|
||||
if not 'selected_categories' in kwargs:
|
||||
if 'selected_categories' not in kwargs:
|
||||
kwargs['selected_categories'] = []
|
||||
for arg in request.args:
|
||||
if arg.startswith('category_'):
|
||||
@ -168,7 +168,7 @@ def render(template_name, override_theme=None, **kwargs):
|
||||
if not kwargs['selected_categories']:
|
||||
kwargs['selected_categories'] = ['general']
|
||||
|
||||
if not 'autocomplete' in kwargs:
|
||||
if 'autocomplete' not in kwargs:
|
||||
kwargs['autocomplete'] = autocomplete
|
||||
|
||||
kwargs['method'] = request.cookies.get('method', 'POST')
|
||||
@ -202,14 +202,15 @@ def index():
|
||||
'index.html',
|
||||
)
|
||||
|
||||
search.results, search.suggestions, search.answers, search.infoboxes = search.search(request)
|
||||
search.results, search.suggestions,\
|
||||
search.answers, search.infoboxes = search.search(request)
|
||||
|
||||
for result in search.results:
|
||||
|
||||
if not search.paging and engines[result['engine']].paging:
|
||||
search.paging = True
|
||||
|
||||
# check if HTTPS rewrite is required
|
||||
# check if HTTPS rewrite is required
|
||||
if settings['server']['https_rewrite']\
|
||||
and result['parsed_url'].scheme == 'http':
|
||||
|
||||
@ -236,7 +237,7 @@ def index():
|
||||
try:
|
||||
# TODO, precompile rule
|
||||
p = re.compile(rule[0])
|
||||
|
||||
|
||||
# rewrite url if possible
|
||||
new_result_url = p.sub(rule[1], result['url'])
|
||||
except:
|
||||
@ -250,17 +251,21 @@ def index():
|
||||
continue
|
||||
|
||||
# get domainname from result
|
||||
# TODO, does only work correct with TLD's like asdf.com, not for asdf.com.de
|
||||
# TODO, does only work correct with TLD's like
|
||||
# asdf.com, not for asdf.com.de
|
||||
# TODO, using publicsuffix instead of this rewrite rule
|
||||
old_result_domainname = '.'.join(result['parsed_url'].hostname.split('.')[-2:])
|
||||
new_result_domainname = '.'.join(new_parsed_url.hostname.split('.')[-2:])
|
||||
old_result_domainname = '.'.join(
|
||||
result['parsed_url'].hostname.split('.')[-2:])
|
||||
new_result_domainname = '.'.join(
|
||||
new_parsed_url.hostname.split('.')[-2:])
|
||||
|
||||
# check if rewritten hostname is the same, to protect against wrong or malicious rewrite rules
|
||||
# check if rewritten hostname is the same,
|
||||
# to protect against wrong or malicious rewrite rules
|
||||
if old_result_domainname == new_result_domainname:
|
||||
# set new url
|
||||
result['url'] = new_result_url
|
||||
|
||||
# target has matched, do not search over the other rules
|
||||
# target has matched, do not search over the other rules
|
||||
break
|
||||
|
||||
if search.request_data.get('format', 'html') == 'html':
|
||||
@ -429,7 +434,7 @@ def preferences():
|
||||
for pd_name, pd in request.form.items():
|
||||
if pd_name.startswith('category_'):
|
||||
category = pd_name[9:]
|
||||
if not category in categories:
|
||||
if category not in categories:
|
||||
continue
|
||||
selected_categories.append(category)
|
||||
elif pd_name == 'locale' and pd in settings['locales']:
|
||||
|
Loading…
Reference in New Issue
Block a user