1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-19 02:40:11 +01:00

[fix] pep8 : engines (errors E121, E127, E128 and E501 still exist)

This commit is contained in:
dalf 2014-12-07 16:37:56 +01:00
parent ffcec383b7
commit 7c13d630e4
22 changed files with 109 additions and 97 deletions

View File

@ -1,7 +1,8 @@
## Bing (Web) ## Bing (Web)
# #
# @website https://www.bing.com # @website https://www.bing.com
# @provide-api yes (http://datamarket.azure.com/dataset/bing/search), max. 5000 query/month # @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
# max. 5000 query/month
# #
# @using-api no (because of query limit) # @using-api no (because of query limit)
# @results HTML (using search portal) # @results HTML (using search portal)

View File

@ -1,17 +1,19 @@
## Bing (Images) ## Bing (Images)
# #
# @website https://www.bing.com/images # @website https://www.bing.com/images
# @provide-api yes (http://datamarket.azure.com/dataset/bing/search), max. 5000 query/month # @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
# max. 5000 query/month
# #
# @using-api no (because of query limit) # @using-api no (because of query limit)
# @results HTML (using search portal) # @results HTML (using search portal)
# @stable no (HTML can change) # @stable no (HTML can change)
# @parse url, title, img_src # @parse url, title, img_src
# #
# @todo currently there are up to 35 images receive per page, because bing does not parse count=10. limited response to 10 images # @todo currently there are up to 35 images receive per page,
# because bing does not parse count=10.
# limited response to 10 images
from urllib import urlencode from urllib import urlencode
from cgi import escape
from lxml import html from lxml import html
from yaml import load from yaml import load
import re import re
@ -51,14 +53,14 @@ def response(resp):
dom = html.fromstring(resp.content) dom = html.fromstring(resp.content)
# init regex for yaml-parsing # init regex for yaml-parsing
p = re.compile( '({|,)([a-z]+):(")') p = re.compile('({|,)([a-z]+):(")')
# parse results # parse results
for result in dom.xpath('//div[@class="dg_u"]'): for result in dom.xpath('//div[@class="dg_u"]'):
link = result.xpath('./a')[0] link = result.xpath('./a')[0]
# parse yaml-data (it is required to add a space, to make it parsable) # parse yaml-data (it is required to add a space, to make it parsable)
yaml_data = load(p.sub( r'\1\2: \3', link.attrib.get('m'))) yaml_data = load(p.sub(r'\1\2: \3', link.attrib.get('m')))
title = link.attrib.get('t1') title = link.attrib.get('t1')
#url = 'http://' + link.attrib.get('t3') #url = 'http://' + link.attrib.get('t3')

View File

@ -1,7 +1,8 @@
## Bing (News) ## Bing (News)
# #
# @website https://www.bing.com/news # @website https://www.bing.com/news
# @provide-api yes (http://datamarket.azure.com/dataset/bing/search), max. 5000 query/month # @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
# max. 5000 query/month
# #
# @using-api no (because of query limit) # @using-api no (because of query limit)
# @results HTML (using search portal) # @results HTML (using search portal)
@ -57,12 +58,12 @@ def response(resp):
url = link.attrib.get('href') url = link.attrib.get('href')
title = ' '.join(link.xpath('.//text()')) title = ' '.join(link.xpath('.//text()'))
contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()') contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()')
if contentXPath != None: if contentXPath is not None:
content = escape(' '.join(contentXPath)) content = escape(' '.join(contentXPath))
# parse publishedDate # parse publishedDate
publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div//span[contains(@class,"sn_ST")]//span[contains(@class,"sn_tm")]//text()') publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div//span[contains(@class,"sn_ST")]//span[contains(@class,"sn_tm")]//text()')
if publishedDateXPath != None: if publishedDateXPath is not None:
publishedDate = escape(' '.join(publishedDateXPath)) publishedDate = escape(' '.join(publishedDateXPath))
if re.match("^[0-9]+ minute(s|) ago$", publishedDate): if re.match("^[0-9]+ minute(s|) ago$", publishedDate):

View File

@ -55,6 +55,6 @@ def response(resp):
resp.search_params['to'].lower() resp.search_params['to'].lower()
) )
results.append({'answer' : answer, 'url': url}) results.append({'answer': answer, 'url': url})
return results return results

View File

@ -12,7 +12,6 @@
from urllib import urlencode from urllib import urlencode
from json import loads from json import loads
from lxml import html
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']

View File

@ -1,7 +1,8 @@
## DuckDuckGo (Web) ## DuckDuckGo (Web)
# #
# @website https://duckduckgo.com/ # @website https://duckduckgo.com/
# @provide-api yes (https://duckduckgo.com/api), but not all results from search-site # @provide-api yes (https://duckduckgo.com/api),
# but not all results from search-site
# #
# @using-api no # @using-api no
# @results HTML (using search portal) # @results HTML (using search portal)
@ -9,7 +10,8 @@
# @parse url, title, content # @parse url, title, content
# #
# @todo rewrite to api # @todo rewrite to api
# @todo language support (the current used site does not support language-change) # @todo language support
# (the current used site does not support language-change)
from urllib import urlencode from urllib import urlencode
from lxml.html import fromstring from lxml.html import fromstring
@ -37,7 +39,7 @@ def request(query, params):
if params['language'] == 'all': if params['language'] == 'all':
locale = 'en-us' locale = 'en-us'
else: else:
locale = params['language'].replace('_','-').lower() locale = params['language'].replace('_', '-').lower()
params['url'] = url.format( params['url'] = url.format(
query=urlencode({'q': query, 'kl': locale}), query=urlencode({'q': query, 'kl': locale}),

View File

@ -27,6 +27,7 @@ search_url = url + 'api?{query}&start={offset}&length={number_of_results}&l={lan
search_category = {'general': 'web', search_category = {'general': 'web',
'news': 'news'} 'news': 'news'}
# do search-request # do search-request
def request(query, params): def request(query, params):
offset = (params['pageno']-1) * number_of_results + 1 offset = (params['pageno']-1) * number_of_results + 1
@ -48,7 +49,7 @@ def request(query, params):
query=urlencode({'q': query}), query=urlencode({'q': query}),
language=language, language=language,
categorie=categorie, categorie=categorie,
api_key=api_key ) api_key=api_key)
# using searx User-Agent # using searx User-Agent
params['headers']['User-Agent'] = searx_useragent() params['headers']['User-Agent'] = searx_useragent()

View File

@ -1,7 +1,8 @@
## Google (Images) ## Google (Images)
# #
# @website https://www.google.com # @website https://www.google.com
# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated! # @provide-api yes (https://developers.google.com/web-search/docs/),
# deprecated!
# #
# @using-api yes # @using-api yes
# @results JSON # @results JSON

View File

@ -1,7 +1,8 @@
## Google (News) ## Google (News)
# #
# @website https://www.google.com # @website https://www.google.com
# @provide-api yes (https://developers.google.com/web-search/docs/), deprecated! # @provide-api yes (https://developers.google.com/web-search/docs/),
# deprecated!
# #
# @using-api yes # @using-api yes
# @results JSON # @results JSON

View File

@ -39,16 +39,16 @@ def response(resp):
url = result_base_url.format(osm_type=osm_type, url = result_base_url.format(osm_type=osm_type,
osm_id=r['osm_id']) osm_id=r['osm_id'])
osm = {'type':osm_type, osm = {'type': osm_type,
'id':r['osm_id']} 'id': r['osm_id']}
geojson = r.get('geojson') geojson = r.get('geojson')
# if no geojson is found and osm_type is a node, add geojson Point # if no geojson is found and osm_type is a node, add geojson Point
if not geojson and\ if not geojson and\
osm_type == 'node': osm_type == 'node':
geojson = {u'type':u'Point', geojson = {u'type': u'Point',
u'coordinates':[r['lon'],r['lat']]} u'coordinates': [r['lon'], r['lat']]}
address_raw = r.get('address') address_raw = r.get('address')
address = {} address = {}
@ -59,20 +59,20 @@ def response(resp):
r['class'] == 'tourism' or\ r['class'] == 'tourism' or\
r['class'] == 'leisure': r['class'] == 'leisure':
if address_raw.get('address29'): if address_raw.get('address29'):
address = {'name':address_raw.get('address29')} address = {'name': address_raw.get('address29')}
else: else:
address = {'name':address_raw.get(r['type'])} address = {'name': address_raw.get(r['type'])}
# add rest of adressdata, if something is already found # add rest of adressdata, if something is already found
if address.get('name'): if address.get('name'):
address.update({'house_number':address_raw.get('house_number'), address.update({'house_number': address_raw.get('house_number'),
'road':address_raw.get('road'), 'road': address_raw.get('road'),
'locality':address_raw.get('city', 'locality': address_raw.get('city',
address_raw.get('town', address_raw.get('town',
address_raw.get('village'))), address_raw.get('village'))),
'postcode':address_raw.get('postcode'), 'postcode': address_raw.get('postcode'),
'country':address_raw.get('country'), 'country': address_raw.get('country'),
'country_code':address_raw.get('country_code')}) 'country_code': address_raw.get('country_code')})
else: else:
address = None address = None

View File

@ -1,7 +1,8 @@
## Vimeo (Videos) ## Vimeo (Videos)
# #
# @website https://vimeo.com/ # @website https://vimeo.com/
# @provide-api yes (http://developer.vimeo.com/api), they have a maximum count of queries/hour # @provide-api yes (http://developer.vimeo.com/api),
# they have a maximum count of queries/hour
# #
# @using-api no (TODO, rewrite to api) # @using-api no (TODO, rewrite to api)
# @results HTML (using search portal) # @results HTML (using search portal)
@ -35,11 +36,12 @@ publishedDate_xpath = './/p[@class="meta"]//attribute::datetime'
# do search-request # do search-request
def request(query, params): def request(query, params):
params['url'] = search_url.format(pageno=params['pageno'] , params['url'] = search_url.format(pageno=params['pageno'],
query=urlencode({'q': query})) query=urlencode({'q': query}))
# TODO required? # TODO required?
params['cookies']['__utma'] = '00000000.000#0000000.0000000000.0000000000.0000000000.0' params['cookies']['__utma'] =\
'00000000.000#0000000.0000000000.0000000000.0000000000.0'
return params return params

View File

@ -1,7 +1,8 @@
## Yacy (Web, Images, Videos, Music, Files) ## Yacy (Web, Images, Videos, Music, Files)
# #
# @website http://yacy.net # @website http://yacy.net
# @provide-api yes (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch) # @provide-api yes
# (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
# #
# @using-api yes # @using-api yes
# @results JSON # @results JSON
@ -16,7 +17,7 @@ from urllib import urlencode
from dateutil import parser from dateutil import parser
# engine dependent config # engine dependent config
categories = ['general', 'images'] #TODO , 'music', 'videos', 'files' categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
paging = True paging = True
language_support = True language_support = True
number_of_results = 5 number_of_results = 5

View File

@ -1,7 +1,8 @@
## Yahoo (Web) ## Yahoo (Web)
# #
# @website https://search.yahoo.com/web # @website https://search.yahoo.com/web
# @provide-api yes (https://developer.yahoo.com/boss/search/), $0.80/1000 queries # @provide-api yes (https://developer.yahoo.com/boss/search/),
# $0.80/1000 queries
# #
# @using-api no (because pricing) # @using-api no (because pricing)
# @results HTML (using search portal) # @results HTML (using search portal)
@ -40,7 +41,7 @@ def parse_url(url_string):
if endpos > -1: if endpos > -1:
endpositions.append(endpos) endpositions.append(endpos)
if start==0 or len(endpositions) == 0: if start == 0 or len(endpositions) == 0:
return url_string return url_string
else: else:
end = min(endpositions) end = min(endpositions)