1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-19 19:00:10 +01:00

[FIX] google videos thumbnails

This commit is contained in:
Venca24 2019-01-04 15:48:22 +01:00
parent cee15f0375
commit cf26aba93b

View File

@ -7,15 +7,16 @@
@using-api no @using-api no
@results HTML @results HTML
@stable no @stable no
@parse url, title, content @parse url, title, content, thumbnail
""" """
from datetime import date, timedelta from datetime import date, timedelta
from json import loads from json import loads
from lxml import html from lxml import html
from searx.engines import logger
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode from searx.url_utils import urlencode
import re
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']
@ -73,11 +74,24 @@ def response(resp):
url = result.xpath('.//div[@class="r"]/a/@href')[0] url = result.xpath('.//div[@class="r"]/a/@href')[0]
content = extract_text(result.xpath('.//span[@class="st"]')) content = extract_text(result.xpath('.//span[@class="st"]'))
# get thumbnails
script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
id = result.xpath('.//div[@class="s"]//img/@id')[0]
thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
script)
logger.debug('google video engine: ' + id + ' matched ' + str(len(thumbnails_data)) + ' times (thumbnail)')
tmp = []
if len(thumbnails_data) != 0:
tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
thumbnail = ''
if len(tmp) != 0:
thumbnail = tmp[-1]
# append result # append result
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,
'content': content, 'content': content,
'thumbnail': '', 'thumbnail': thumbnail,
'template': 'videos.html'}) 'template': 'videos.html'})
return results return results