1
0
mirror of https://github.com/searxng/searxng.git synced 2024-09-12 16:30:33 +02:00

Merge branch '500px_rewrite' of github.com:asciimoo/searx

This commit is contained in:
Adam Tauber 2016-12-11 03:31:33 +01:00
commit e12ea9a510
2 changed files with 32 additions and 73 deletions

View File

@ -12,12 +12,9 @@
@todo rewrite to api @todo rewrite to api
""" """
from json import loads
from urllib import urlencode from urllib import urlencode
from urlparse import urljoin from urlparse import urljoin
from lxml import html
import re
from searx.engines.xpath import extract_text
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']
@ -25,13 +22,27 @@ paging = True
# search-url # search-url
base_url = 'https://500px.com' base_url = 'https://500px.com'
search_url = base_url + '/search?search?page={pageno}&type=photos&{query}' search_url = 'https://api.500px.com/v1/photos/search?type=photos'\
'&{query}'\
'&image_size%5B%5D=4'\
'&image_size%5B%5D=20'\
'&image_size%5B%5D=21'\
'&image_size%5B%5D=1080'\
'&image_size%5B%5D=1600'\
'&image_size%5B%5D=2048'\
'&include_states=true'\
'&formats=jpeg%2Clytro'\
'&include_tags=true'\
'&exclude_nude=true'\
'&page={pageno}'\
'&rpp=50'\
'&sdk_key=b68e60cff4c929bedea36ca978830c5caca790c3'
# do search-request # do search-request
def request(query, params): def request(query, params):
params['url'] = search_url.format(pageno=params['pageno'], params['url'] = search_url.format(pageno=params['pageno'],
query=urlencode({'q': query})) query=urlencode({'term': query}))
return params return params
@ -40,19 +51,16 @@ def request(query, params):
def response(resp): def response(resp):
results = [] results = []
dom = html.fromstring(resp.text) response_json = loads(resp.text)
regex = re.compile(r'3\.jpg.*$')
# parse results # parse results
for result in dom.xpath('//div[@class="photo"]'): for result in response_json['photos']:
link = result.xpath('.//a')[0] url = urljoin(base_url, result['url'])
url = urljoin(base_url, link.attrib.get('href')) title = result['name']
title = extract_text(result.xpath('.//div[@class="title"]')) # last index is the biggest resolution
thumbnail_src = link.xpath('.//img')[0].attrib.get('src') img_src = result['image_url'][-1]
# To have a bigger thumbnail, uncomment the next line thumbnail_src = result['image_url'][0]
# thumbnail_src = regex.sub('4.jpg', thumbnail_src) content = result['description'] or ''
content = extract_text(result.xpath('.//div[@class="info"]'))
img_src = regex.sub('2048.jpg', thumbnail_src)
# append result # append result
results.append({'url': url, results.append({'url': url,

File diff suppressed because one or more lines are too long