Merge pull request #1658 from dalf/video-fixes

Fix dailymotion, google_videos and youtube_noapi engines
2024-11-22 12:10:11 +01:00 · 2019-08-01 07:44:30 +02:00 · 2019-08-01 07:44:30 +02:00 · 1bed39e6cb
commit 1bed39e6cb
parent 7f56c78876 0c032c8429
3 changed files with 22 additions and 17 deletions
--- a/searx/engines/dailymotion.py
+++ b/searx/engines/dailymotion.py
@ -15,7 +15,7 @@
 from json import loads
 from datetime import datetime
 from searx.url_utils import urlencode
-from searx.utils import match_language
+from searx.utils import match_language, html_to_text

 # engine dependent config
 categories = ['videos']
@ -59,7 +59,7 @@ def response(resp):
    for res in search_res['list']:
        title = res['title']
        url = res['url']
-        content = res['description']
+        content = html_to_text(res['description'])
        thumbnail = res['thumbnail_360_url']
        publishedDate = datetime.fromtimestamp(res['created_time'], None)
        embedded = embedded_url.format(videoid=res['id'])
--- a/searx/engines/google_videos.py
+++ b/searx/engines/google_videos.py
@ -75,15 +75,17 @@ def response(resp):

        # get thumbnails
        script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
-        id = result.xpath('.//div[@class="s"]//img/@id')[0]
-        thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
-                                     script)
-        tmp = []
-        if len(thumbnails_data) != 0:
-            tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
-        thumbnail = ''
-        if len(tmp) != 0:
-            thumbnail = tmp[-1]
+        ids = result.xpath('.//div[@class="s"]//img/@id')
+        if len(ids) > 0:
+            thumbnails_data = \
+                re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + ids[0],
+                           script)
+            tmp = []
+            if len(thumbnails_data) != 0:
+                tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
+            thumbnail = ''
+            if len(tmp) != 0:
+                thumbnail = tmp[-1]

        # append result
        results.append({'url': url,
--- a/searx/engines/youtube_noapi.py
+++ b/searx/engines/youtube_noapi.py
@ -67,12 +67,8 @@ def response(resp):
            if videoid is not None:
                url = base_youtube_url + videoid
                thumbnail = 'https://i.ytimg.com/vi/' + videoid + '/hqdefault.jpg'
-                title = video.get('title', {}).get('simpleText', videoid)
-                description_snippet = video.get('descriptionSnippet', {})
-                if 'runs' in description_snippet:
-                    content = reduce(lambda a, b: a + b.get('text', ''), description_snippet.get('runs'), '')
-                else:
-                    content = description_snippet.get('simpleText', '')
+                title = get_text_from_json(video.get('title', {}))
+                content = get_text_from_json(video.get('descriptionSnippet', {}))
                embedded = embedded_url.format(videoid=videoid)

                # append result
@ -85,3 +81,10 @@ def response(resp):

    # return results
    return results
+
+
+def get_text_from_json(element):
+    if 'runs' in element:
+        return reduce(lambda a, b: a + b.get('text', ''), element.get('runs'), '')
+    else:
+        return element.get('simpleText', '')