1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-17 18:00:12 +01:00

[fix] pep/flake8 compatibility

This commit is contained in:
asciimoo 2014-01-20 02:31:20 +01:00
parent 692c0bf5f0
commit b2492c94f4
23 changed files with 197 additions and 109 deletions

View File

@ -66,7 +66,7 @@ for engine_data in settings['engines']:
for engine_attr in dir(engine): for engine_attr in dir(engine):
if engine_attr.startswith('_'): if engine_attr.startswith('_'):
continue continue
if getattr(engine, engine_attr) == None: if getattr(engine, engine_attr) is None:
print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr) # noqa print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr) # noqa
sys.exit(1) sys.exit(1)
engines[engine.name] = engine engines[engine.name] = engine

View File

@ -5,7 +5,7 @@ categories = []
url = 'http://finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X' url = 'http://finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
weight = 100 weight = 100
parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I) parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I) # noqa
def request(query, params): def request(query, params):
@ -46,9 +46,11 @@ def response(resp):
resp.search_params['ammount'] * conversion_rate resp.search_params['ammount'] * conversion_rate
) )
content = '1 {0} is {1} {2}'.format(resp.search_params['from'], conversion_rate, resp.search_params['to']) content = '1 {0} is {1} {2}'.format(resp.search_params['from'],
conversion_rate,
resp.search_params['to'])
now_date = datetime.now().strftime('%Y%m%d') now_date = datetime.now().strftime('%Y%m%d')
url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html' url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html' # noqa
url = url.format( url = url.format(
now_date, now_date,
resp.search_params['ammount'], resp.search_params['ammount'],

View File

@ -6,7 +6,10 @@ categories = ['videos']
locale = 'en_US' locale = 'en_US'
# see http://www.dailymotion.com/doc/api/obj-video.html # see http://www.dailymotion.com/doc/api/obj-video.html
search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}' search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}' # noqa
# TODO use video result template
content_tpl = '<a href="{0}" title="{0}" ><img src="{1}" /></a><br />'
def request(query, params): def request(query, params):
@ -25,7 +28,7 @@ def response(resp):
title = res['title'] title = res['title']
url = res['url'] url = res['url']
if res['thumbnail_360_url']: if res['thumbnail_360_url']:
content = '<a href="{0}" title="{0}" ><img src="{1}" /></a><br />'.format(url, res['thumbnail_360_url']) content = content_tpl.format(url, res['thumbnail_360_url'])
else: else:
content = '' content = ''
if res['description']: if res['description']:

View File

@ -7,6 +7,7 @@ categories = ['images']
base_url = 'https://www.deviantart.com/' base_url = 'https://www.deviantart.com/'
search_url = base_url+'search?' search_url = base_url+'search?'
def request(query, params): def request(query, params):
global search_url global search_url
params['url'] = search_url + urlencode({'q': query}) params['url'] = search_url + urlencode({'q': query})
@ -22,8 +23,11 @@ def response(resp):
for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'): for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):
link = result.xpath('.//a[contains(@class, "thumb")]')[0] link = result.xpath('.//a[contains(@class, "thumb")]')[0]
url = urljoin(base_url, link.attrib.get('href')) url = urljoin(base_url, link.attrib.get('href'))
title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]') title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]') # noqa
title = ''.join(title_links[0].xpath('.//text()')) title = ''.join(title_links[0].xpath('.//text()'))
img_src = link.xpath('.//img')[0].attrib['src'] img_src = link.xpath('.//img')[0].attrib['src']
results.append({'url': url, 'title': title, 'img_src': img_src, 'template': 'images.html'}) results.append({'url': url,
'title': title,
'img_src': img_src,
'template': 'images.html'})
return results return results

View File

@ -6,8 +6,11 @@ url = 'https://duckduckgo.com/'
search_url = url + 'd.js?{query}&p=1&s=0' search_url = url + 'd.js?{query}&p=1&s=0'
locale = 'us-en' locale = 'us-en'
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query, 'l': locale})) q = urlencode({'q': query,
'l': locale})
params['url'] = search_url.format(query=q)
return params return params
@ -17,8 +20,7 @@ def response(resp):
for r in search_res: for r in search_res:
if not r.get('t'): if not r.get('t'):
continue continue
results.append({'title': r['t'] results.append({'title': r['t'],
,'content': html_to_text(r['a']) 'content': html_to_text(r['a']),
,'url': r['u'] 'url': r['u']})
})
return results return results

View File

@ -3,6 +3,7 @@ from urllib import urlencode
url = 'http://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1' url = 'http://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1'
def request(query, params): def request(query, params):
params['url'] = url.format(query=urlencode({'q': query})) params['url'] = url.format(query=urlencode({'q': query}))
return params return params
@ -13,11 +14,10 @@ def response(resp):
results = [] results = []
if 'Definition' in search_res: if 'Definition' in search_res:
if search_res.get('AbstractURL'): if search_res.get('AbstractURL'):
res = {'title' : search_res.get('Heading', '') res = {'title': search_res.get('Heading', ''),
,'content' : search_res.get('Definition', '') 'content': search_res.get('Definition', ''),
,'url' : search_res.get('AbstractURL', '') 'url': search_res.get('AbstractURL', ''),
,'class' : 'definition_result' 'class': 'definition_result'}
}
results.append(res) results.append(res)
return results return results

View File

@ -2,7 +2,8 @@ from urllib import urlencode
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
url = 'http://www.filecrop.com/' url = 'http://www.filecrop.com/'
search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1' search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1' # noqa
class FilecropResultParser(HTMLParser): class FilecropResultParser(HTMLParser):
def __init__(self): def __init__(self):
@ -18,22 +19,28 @@ class FilecropResultParser(HTMLParser):
def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
if tag == 'tr': if tag == 'tr':
if ('bgcolor', '#edeff5') in attrs or ('bgcolor', '#ffffff') in attrs: if ('bgcolor', '#edeff5') in attrs or\
('bgcolor', '#ffffff') in attrs:
self.__start_processing = True self.__start_processing = True
if not self.__start_processing: if not self.__start_processing:
return return
if tag == 'label': if tag == 'label':
self.result['title'] = [attr[1] for attr in attrs if attr[0] == 'title'][0] self.result['title'] = [attr[1] for attr in attrs
elif tag == 'a' and ('rel', 'nofollow') in attrs and ('class', 'sourcelink') in attrs: if attr[0] == 'title'][0]
elif tag == 'a' and ('rel', 'nofollow') in attrs\
and ('class', 'sourcelink') in attrs:
if 'content' in self.result: if 'content' in self.result:
self.result['content'] += [attr[1] for attr in attrs if attr[0] == 'title'][0] self.result['content'] += [attr[1] for attr in attrs
if attr[0] == 'title'][0]
else: else:
self.result['content'] = [attr[1] for attr in attrs if attr[0] == 'title'][0] self.result['content'] = [attr[1] for attr in attrs
if attr[0] == 'title'][0]
self.result['content'] += ' ' self.result['content'] += ' '
elif tag == 'a': elif tag == 'a':
self.result['url'] = url + [attr[1] for attr in attrs if attr[0] == 'href'][0] self.result['url'] = url + [attr[1] for attr in attrs
if attr[0] == 'href'][0]
def handle_endtag(self, tag): def handle_endtag(self, tag):
if self.__start_processing is False: if self.__start_processing is False:
@ -60,10 +67,12 @@ class FilecropResultParser(HTMLParser):
self.data_counter += 1 self.data_counter += 1
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=urlencode({'w': query})) params['url'] = search_url.format(query=urlencode({'w': query}))
return params return params
def response(resp): def response(resp):
parser = FilecropResultParser() parser = FilecropResultParser()
parser.feed(resp.text) parser.feed(resp.text)

View File

@ -8,21 +8,27 @@ categories = ['images']
url = 'https://secure.flickr.com/' url = 'https://secure.flickr.com/'
search_url = url+'search/?{query}' search_url = url+'search/?{query}'
results_xpath = '//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]' # noqa
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query})) params['url'] = search_url.format(query=urlencode({'q': query}))
return params return params
def response(resp): def response(resp):
global base_url global base_url
results = [] results = []
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
for result in dom.xpath('//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]'): for result in dom.xpath(results_xpath):
href = urljoin(url, result.attrib.get('href')) href = urljoin(url, result.attrib.get('href'))
img = result.xpath('.//img')[0] img = result.xpath('.//img')[0]
title = img.attrib.get('alt', '') title = img.attrib.get('alt', '')
img_src = img.attrib.get('data-defer-src') img_src = img.attrib.get('data-defer-src')
if not img_src: if not img_src:
continue continue
results.append({'url': href, 'title': title, 'img_src': img_src, 'template': 'images.html'}) results.append({'url': href,
'title': title,
'img_src': img_src,
'template': 'images.html'})
return results return results

View File

@ -4,12 +4,15 @@ from cgi import escape
categories = ['it'] categories = ['it']
search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}' search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}' # noqa
accept_header = 'application/vnd.github.preview.text-match+json'
def request(query, params): def request(query, params):
global search_url global search_url
params['url'] = search_url.format(query=urlencode({'q': query})) params['url'] = search_url.format(query=urlencode({'q': query}))
params['headers']['Accept'] = 'application/vnd.github.preview.text-match+json' params['headers']['Accept'] = accept_header
return params return params

View File

@ -6,12 +6,14 @@ from json import loads
categories = ['images'] categories = ['images']
url = 'https://ajax.googleapis.com/' url = 'https://ajax.googleapis.com/'
search_url = url + 'ajax/services/search/images?v=1.0&start=0&rsz=large&safe=off&filter=off&{query}' search_url = url + 'ajax/services/search/images?v=1.0&start=0&rsz=large&safe=off&filter=off&{query}' # noqa
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query})) params['url'] = search_url.format(query=urlencode({'q': query}))
return params return params
def response(resp): def response(resp):
results = [] results = []
search_res = loads(resp.text) search_res = loads(resp.text)
@ -24,5 +26,9 @@ def response(resp):
title = result['title'] title = result['title']
if not result['url']: if not result['url']:
continue continue
results.append({'url': href, 'title': title, 'content': '', 'img_src': result['url'], 'template': 'images.html'}) results.append({'url': href,
'title': title,
'content': '',
'img_src': result['url'],
'template': 'images.html'})
return results return results

View File

@ -8,6 +8,7 @@ content_query = None
title_query = None title_query = None
#suggestion_xpath = '' #suggestion_xpath = ''
def iterate(iterable): def iterate(iterable):
if type(iterable) == dict: if type(iterable) == dict:
it = iterable.iteritems() it = iterable.iteritems()
@ -17,11 +18,15 @@ def iterate(iterable):
for index, value in it: for index, value in it:
yield str(index), value yield str(index), value
def is_iterable(obj): def is_iterable(obj):
if type(obj) == str: return False if type(obj) == str:
if type(obj) == unicode: return False return False
if type(obj) == unicode:
return False
return isinstance(obj, Iterable) return isinstance(obj, Iterable)
def parse(query): def parse(query):
q = [] q = []
for part in query.split('/'): for part in query.split('/'):
@ -31,6 +36,7 @@ def parse(query):
q.append(part) q.append(part)
return q return q
def do_query(data, q): def do_query(data, q):
ret = [] ret = []
if not len(q): if not len(q):
@ -54,11 +60,13 @@ def do_query(data, q):
ret.extend(do_query(value, q)) ret.extend(do_query(value, q))
return ret return ret
def query(data, query_string): def query(data, query_string):
q = parse(query_string) q = parse(query_string)
return do_query(data, q) return do_query(data, q)
def request(query, params): def request(query, params):
query = urlencode({'q': query})[2:] query = urlencode({'q': query})[2:]
params['url'] = search_url.format(query=query) params['url'] = search_url.format(query=query)

View File

@ -3,10 +3,12 @@ from urllib import urlencode, quote
url = 'https://en.wikipedia.org/' url = 'https://en.wikipedia.org/'
search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json' # noqa
number_of_results = 10 number_of_results = 10
def request(query, params): def request(query, params):
search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json'
params['url'] = search_url.format(query=urlencode({'srsearch': query})) params['url'] = search_url.format(query=urlencode({'srsearch': query}))
return params return params
@ -14,7 +16,5 @@ def request(query, params):
def response(resp): def response(resp):
search_results = loads(resp.text) search_results = loads(resp.text)
res = search_results.get('query', {}).get('search', []) res = search_results.get('query', {}).get('search', [])
return [{'url': url + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')), # noqa
return [{'url': url + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')),
'title': result['title']} for result in res[:int(number_of_results)]] 'title': result['title']} for result in res[:int(number_of_results)]]

View File

@ -7,13 +7,18 @@ categories = ['videos', 'music']
url = 'https://thepiratebay.se/' url = 'https://thepiratebay.se/'
search_url = url + 'search/{search_term}/0/99/{search_type}' search_url = url + 'search/{search_term}/0/99/{search_type}'
search_types = {'videos': '200' search_types = {'videos': '200',
,'music' : '100' 'music': '100',
,'files' : '0' 'files': '0'}
}
magnet_xpath = './/a[@title="Download this torrent using magnet"]'
content_xpath = './/font[@class="detDesc"]//text()'
def request(query, params): def request(query, params):
params['url'] = search_url.format(search_term=quote(query), search_type=search_types.get(params['category'])) search_type = search_types.get(params['category'])
params['url'] = search_url.format(search_term=quote(query),
search_type=search_type)
return params return params
@ -27,10 +32,14 @@ def response(resp):
link = result.xpath('.//div[@class="detName"]//a')[0] link = result.xpath('.//div[@class="detName"]//a')[0]
href = urljoin(url, link.attrib.get('href')) href = urljoin(url, link.attrib.get('href'))
title = ' '.join(link.xpath('.//text()')) title = ' '.join(link.xpath('.//text()'))
content = escape(' '.join(result.xpath('.//font[@class="detDesc"]//text()'))) content = escape(' '.join(result.xpath(content_xpath)))
seed, leech = result.xpath('.//td[@align="right"]/text()')[:2] seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
magnetlink = result.xpath('.//a[@title="Download this torrent using magnet"]')[0] magnetlink = result.xpath(magnet_xpath)[0]
results.append({'url': href, 'title': title, 'content': content, results.append({'url': href,
'seed': seed, 'leech': leech, 'magnetlink': magnetlink.attrib['href'], 'title': title,
'content': content,
'seed': seed,
'leech': leech,
'magnetlink': magnetlink.attrib['href'],
'template': 'torrent.html'}) 'template': 'torrent.html'})
return results return results

View File

@ -5,7 +5,8 @@ categories = ['music']
guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28' guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28'
url = 'https://api.soundcloud.com/' url = 'https://api.soundcloud.com/'
search_url = url + 'search?{query}&facet=model&limit=20&offset=0&linked_partitioning=1&client_id='+guest_client_id search_url = url + 'search?{query}&facet=model&limit=20&offset=0&linked_partitioning=1&client_id='+guest_client_id # noqa
def request(query, params): def request(query, params):
global search_url global search_url
@ -21,5 +22,7 @@ def response(resp):
if result['kind'] in ('track', 'playlist'): if result['kind'] in ('track', 'playlist'):
title = result['title'] title = result['title']
content = result['description'] content = result['description']
results.append({'url': result['permalink_url'], 'title': title, 'content': content}) results.append({'url': result['permalink_url'],
'title': title,
'content': content})
return results return results

View File

@ -7,6 +7,8 @@ categories = ['it']
url = 'http://stackoverflow.com/' url = 'http://stackoverflow.com/'
search_url = url+'search?' search_url = url+'search?'
result_xpath = './/div[@class="excerpt"]//text()'
def request(query, params): def request(query, params):
params['url'] = search_url + urlencode({'q': query}) params['url'] = search_url + urlencode({'q': query})
@ -20,6 +22,6 @@ def response(resp):
link = result.xpath('.//div[@class="result-link"]//a')[0] link = result.xpath('.//div[@class="result-link"]//a')[0]
href = urljoin(url, link.attrib.get('href')) href = urljoin(url, link.attrib.get('href'))
title = escape(' '.join(link.xpath('.//text()'))) title = escape(' '.join(link.xpath('.//text()')))
content = escape(' '.join(result.xpath('.//div[@class="excerpt"]//text()'))) content = escape(' '.join(result.xpath(result_xpath)))
results.append({'url': href, 'title': title, 'content': content}) results.append({'url': href, 'title': title, 'content': content})
return results return results

View File

@ -1,11 +1,10 @@
from urllib import urlencode from urllib import urlencode
from lxml import html from lxml import html
from urlparse import urlparse
from cgi import escape
base_url = 'https://startpage.com/' base_url = 'https://startpage.com/'
search_url = base_url+'do/search' search_url = base_url+'do/search'
def request(query, params): def request(query, params):
global search_url global search_url
query = urlencode({'q': query})[2:] query = urlencode({'q': query})[2:]
@ -24,7 +23,6 @@ def response(resp):
for result in dom.xpath('//div[@id="results"]/div[@class="result"]'): for result in dom.xpath('//div[@id="results"]/div[@class="result"]'):
link = result.xpath('.//h3/a')[0] link = result.xpath('.//h3/a')[0]
url = link.attrib.get('href') url = link.attrib.get('href')
parsed_url = urlparse(url)
title = link.text_content() title = link.text_content()
content = result.xpath('./p[@class="desc"]')[0].text_content() content = result.xpath('./p[@class="desc"]')[0].text_content()
results.append({'url': url, 'title': title, 'content': content}) results.append({'url': url, 'title': title, 'content': content})

View File

@ -7,6 +7,9 @@ categories = ['social media']
base_url = 'https://twitter.com/' base_url = 'https://twitter.com/'
search_url = base_url+'search?' search_url = base_url+'search?'
title_xpath = './/span[@class="username js-action-profile-name"]//text()'
content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
def request(query, params): def request(query, params):
global search_url global search_url
@ -21,7 +24,9 @@ def response(resp):
for tweet in dom.xpath('//li[@data-item-type="tweet"]'): for tweet in dom.xpath('//li[@data-item-type="tweet"]'):
link = tweet.xpath('.//small[@class="time"]//a')[0] link = tweet.xpath('.//small[@class="time"]//a')[0]
url = urljoin(base_url, link.attrib.get('href')) url = urljoin(base_url, link.attrib.get('href'))
title = ''.join(tweet.xpath('.//span[@class="username js-action-profile-name"]//text()')) title = ''.join(tweet.xpath(title_xpath))
content = escape(''.join(tweet.xpath('.//p[@class="js-tweet-text tweet-text"]//text()'))) content = escape(''.join(tweet.xpath(content_xpath)))
results.append({'url': url, 'title': title, 'content': content}) results.append({'url': url,
'title': title,
'content': content})
return results return results

View File

@ -9,23 +9,27 @@ url_xpath = None
content_xpath = None content_xpath = None
title_xpath = None title_xpath = None
results_xpath = '' results_xpath = ''
content_tpl = '<a href="{0}"> <img src="{2}"/> </a>'
# the cookie set by vimeo contains all the following values, but only __utma seems to be requiered # the cookie set by vimeo contains all the following values,
# but only __utma seems to be requiered
cookie = { cookie = {
#'vuid':'918282893.1027205400' #'vuid':'918282893.1027205400'
# 'ab_bs':'%7B%223%22%3A279%7D' # 'ab_bs':'%7B%223%22%3A279%7D'
'__utma': '00000000.000#0000000.0000000000.0000000000.0000000000.0' '__utma': '00000000.000#0000000.0000000000.0000000000.0000000000.0'
# '__utmb':'18302654.1.10.1388942090' # '__utmb':'18302654.1.10.1388942090'
#, '__utmc':'18302654' #, '__utmc':'18302654'
#, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)' #, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)' # noqa
#, '__utml':'search' #, '__utml':'search'
} }
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query})) params['url'] = search_url.format(query=urlencode({'q': query}))
params['cookies'] = cookie params['cookies'] = cookie
return params return params
def response(resp): def response(resp):
results = [] results = []
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
@ -36,10 +40,9 @@ def response(resp):
url = base_url + result.xpath(url_xpath)[0] url = base_url + result.xpath(url_xpath)[0]
title = p.unescape(extract_text(result.xpath(title_xpath))) title = p.unescape(extract_text(result.xpath(title_xpath)))
thumbnail = extract_text(result.xpath(content_xpath)[0]) thumbnail = extract_text(result.xpath(content_xpath)[0])
content = '<a href="{0}"> <img src="{2}"/> </a>'.format(url, title, thumbnail) results.append({'url': url,
results.append({'url': url 'title': title,
, 'title': title 'content': content_tpl.format(url, title, thumbnail),
, 'content': content 'template': 'videos.html',
, 'template':'videos.html' 'thumbnail': thumbnail})
, 'thumbnail': thumbnail})
return results return results

View File

@ -1,7 +1,6 @@
from lxml import html from lxml import html
from urllib import urlencode, unquote from urllib import urlencode, unquote
from urlparse import urlparse, urljoin from urlparse import urlparse, urljoin
from cgi import escape
from lxml.etree import _ElementStringResult from lxml.etree import _ElementStringResult
search_url = None search_url = None
@ -11,11 +10,15 @@ title_xpath = None
suggestion_xpath = '' suggestion_xpath = ''
results_xpath = '' results_xpath = ''
''' '''
if xpath_results is list, extract the text from each result and concat the list if xpath_results is list, extract the text from each result and concat the list
if xpath_results is a xml element, extract all the text node from it ( text_content() method from lxml ) if xpath_results is a xml element, extract all the text node from it
( text_content() method from lxml )
if xpath_results is a string element, then it's already done if xpath_results is a string element, then it's already done
''' '''
def extract_text(xpath_results): def extract_text(xpath_results):
if type(xpath_results) == list: if type(xpath_results) == list:
# it's list of result : concat everything using recursive call # it's list of result : concat everything using recursive call
@ -60,7 +63,8 @@ def normalize_url(url):
url += '/' url += '/'
# FIXME : hack for yahoo # FIXME : hack for yahoo
if parsed_url.hostname == 'search.yahoo.com' and parsed_url.path.startswith('/r'): if parsed_url.hostname == 'search.yahoo.com'\
and parsed_url.path.startswith('/r'):
p = parsed_url.path p = parsed_url.path
mark = p.find('/**') mark = p.find('/**')
if mark != -1: if mark != -1:
@ -87,9 +91,9 @@ def response(resp):
results.append({'url': url, 'title': title, 'content': content}) results.append({'url': url, 'title': title, 'content': content})
else: else:
for url, title, content in zip( for url, title, content in zip(
map(extract_url, dom.xpath(url_xpath)), \ map(extract_url, dom.xpath(url_xpath)),
map(extract_text, dom.xpath(title_xpath)), \ map(extract_text, dom.xpath(title_xpath)),
map(extract_text, dom.xpath(content_xpath)), \ map(extract_text, dom.xpath(content_xpath))
): ):
results.append({'url': url, 'title': title, 'content': content}) results.append({'url': url, 'title': title, 'content': content})

View File

@ -4,10 +4,12 @@ from urllib import urlencode
url = 'http://localhost:8090' url = 'http://localhost:8090'
search_url = '/yacysearch.json?{query}&maximumRecords=10' search_url = '/yacysearch.json?{query}&maximumRecords=10'
def request(query, params): def request(query, params):
params['url'] = url + search_url.format(query=urlencode({'query': query})) params['url'] = url + search_url.format(query=urlencode({'query': query}))
return params return params
def response(resp): def response(resp):
raw_search_results = loads(resp.text) raw_search_results = loads(resp.text)

View File

@ -5,6 +5,7 @@ categories = ['videos']
search_url = 'https://gdata.youtube.com/feeds/api/videos?alt=json&{query}' search_url = 'https://gdata.youtube.com/feeds/api/videos?alt=json&{query}'
def request(query, params): def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query})) params['url'] = search_url.format(query=urlencode({'q': query}))
return params return params
@ -30,17 +31,16 @@ def response(resp):
thumbnail = '' thumbnail = ''
if len(result['media$group']['media$thumbnail']): if len(result['media$group']['media$thumbnail']):
thumbnail = result['media$group']['media$thumbnail'][0]['url'] thumbnail = result['media$group']['media$thumbnail'][0]['url']
content += '<a href="{0}" title="{0}" ><img src="{1}" /></a>'.format(url, thumbnail) content += '<a href="{0}" title="{0}" ><img src="{1}" /></a>'.format(url, thumbnail) # noqa
if len(content): if len(content):
content += '<br />' + result['content']['$t'] content += '<br />' + result['content']['$t']
else: else:
content = result['content']['$t'] content = result['content']['$t']
results.append({'url': url results.append({'url': url,
, 'title': title 'title': title,
, 'content': content 'content': content,
, 'template':'videos.html' 'template': 'videos.html',
, 'thumbnail':thumbnail}) 'thumbnail': thumbnail})
return results return results

View File

@ -1,14 +1,15 @@
from HTMLParser import HTMLParser from HTMLParser import HTMLParser
#import htmlentitydefs #import htmlentitydefs
import csv import csv
import codecs from codecs import getincrementalencoder
import cStringIO import cStringIO
import re import re
def gen_useragent(): def gen_useragent():
# TODO # TODO
return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0" ua = "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
return ua
def highlight_content(content, query): def highlight_content(content, query):
@ -46,7 +47,10 @@ class HTMLTextExtractor(HTMLParser):
self.result.append(d) self.result.append(d)
def handle_charref(self, number): def handle_charref(self, number):
codepoint = int(number[1:], 16) if number[0] in (u'x', u'X') else int(number) if number[0] in (u'x', u'X'):
codepoint = int(number[1:], 16)
else:
codepoint = int(number)
self.result.append(unichr(codepoint)) self.result.append(unichr(codepoint))
def handle_entityref(self, name): def handle_entityref(self, name):
@ -75,10 +79,16 @@ class UnicodeWriter:
self.queue = cStringIO.StringIO() self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)() self.encoder = getincrementalencoder(encoding)()
def writerow(self, row): def writerow(self, row):
self.writer.writerow([(s.encode("utf-8").strip() if type(s) == str or type(s) == unicode else str(s)) for s in row]) unicode_row = []
for col in row:
if type(col) == str or type(col) == unicode:
unicode_row.append(col.encode('utf-8').strip())
else:
unicode_row.append(col)
self.writer.writerow(unicode_row)
# Fetch UTF-8 output from the queue ... # Fetch UTF-8 output from the queue ...
data = self.queue.getvalue() data = self.queue.getvalue()
data = data.decode("utf-8") data = data.decode("utf-8")

View File

@ -18,7 +18,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
''' '''
from searx import settings from searx import settings
from flask import Flask, request, render_template, url_for, Response, make_response, redirect from flask import Flask, request, render_template
from flask import url_for, Response, make_response, redirect
from searx.engines import search, categories, engines, get_engines_stats from searx.engines import search, categories, engines, get_engines_stats
import json import json
import cStringIO import cStringIO
@ -70,7 +71,8 @@ def get_base_url():
def render(template_name, **kwargs): def render(template_name, **kwargs):
global categories global categories
kwargs['categories'] = ['general'] kwargs['categories'] = ['general']
kwargs['categories'].extend(x for x in sorted(categories.keys()) if x != 'general') kwargs['categories'].extend(x for x in
sorted(categories.keys()) if x != 'general')
if not 'selected_categories' in kwargs: if not 'selected_categories' in kwargs:
kwargs['selected_categories'] = [] kwargs['selected_categories'] = []
cookie_categories = request.cookies.get('categories', '').split(',') cookie_categories = request.cookies.get('categories', '').split(',')
@ -114,7 +116,8 @@ def index():
continue continue
selected_categories.append(category) selected_categories.append(category)
if not len(selected_categories): if not len(selected_categories):
cookie_categories = request.cookies.get('categories', '').split(',') cookie_categories = request.cookies.get('categories', '')
cookie_categories = cookie_categories.split(',')
for ccateg in cookie_categories: for ccateg in cookie_categories:
if ccateg in categories: if ccateg in categories:
selected_categories.append(ccateg) selected_categories.append(ccateg)
@ -122,7 +125,9 @@ def index():
selected_categories = ['general'] selected_categories = ['general']
for categ in selected_categories: for categ in selected_categories:
selected_engines.extend({'category': categ, 'name': x.name} for x in categories[categ]) selected_engines.extend({'category': categ,
'name': x.name}
for x in categories[categ])
results, suggestions = search(query, request, selected_engines) results, suggestions = search(query, request, selected_engines)
@ -137,7 +142,8 @@ def index():
result['content'] = html_to_text(result['content']).strip() result['content'] = html_to_text(result['content']).strip()
result['title'] = html_to_text(result['title']).strip() result['title'] = html_to_text(result['title']).strip()
if len(result['url']) > 74: if len(result['url']) > 74:
result['pretty_url'] = result['url'][:35] + '[..]' + result['url'][-35:] url_parts = result['url'][:35], result['url'][-35:]
result['pretty_url'] = '{0}[...]{1}'.format(*url_parts)
else: else:
result['pretty_url'] = result['url'] result['pretty_url'] = result['url']
@ -146,7 +152,8 @@ def index():
result['favicon'] = engine result['favicon'] = engine
if request_data.get('format') == 'json': if request_data.get('format') == 'json':
return Response(json.dumps({'query': query, 'results': results}), mimetype='application/json') return Response(json.dumps({'query': query, 'results': results}),
mimetype='application/json')
elif request_data.get('format') == 'csv': elif request_data.get('format') == 'csv':
csv = UnicodeWriter(cStringIO.StringIO()) csv = UnicodeWriter(cStringIO.StringIO())
keys = ('title', 'url', 'content', 'host', 'engine', 'score') keys = ('title', 'url', 'content', 'host', 'engine', 'score')
@ -157,7 +164,8 @@ def index():
csv.writerow([row.get(key, '') for key in keys]) csv.writerow([row.get(key, '') for key in keys])
csv.stream.seek(0) csv.stream.seek(0)
response = Response(csv.stream.read(), mimetype='application/csv') response = Response(csv.stream.read(), mimetype='application/csv')
response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format('_'.join(query.split()))) content_disp = 'attachment;Filename=searx_-_{0}.csv'.format(query)
response.headers.add('Content-Disposition', content_disp)
return response return response
elif request_data.get('format') == 'rss': elif request_data.get('format') == 'rss':
response_rss = render( response_rss = render(
@ -248,7 +256,8 @@ def opensearch():
@app.route('/favicon.ico') @app.route('/favicon.ico')
def favicon(): def favicon():
return send_from_directory(os.path.join(app.root_path, 'static/img'), return send_from_directory(os.path.join(app.root_path, 'static/img'),
'favicon.png', mimetype='image/vnd.microsoft.icon') 'favicon.png',
mimetype='image/vnd.microsoft.icon')
def run(): def run():