From c645915171d5210d63ef0cf578d2389c67f03663 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Sat, 10 Feb 2018 19:44:07 +0100 Subject: [PATCH 1/2] fix bing videos engine --- searx/engines/bing_videos.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/searx/engines/bing_videos.py b/searx/engines/bing_videos.py index bd91bce37..312a82ba1 100644 --- a/searx/engines/bing_videos.py +++ b/searx/engines/bing_videos.py @@ -69,22 +69,11 @@ def response(resp): dom = html.fromstring(resp.text) for result in dom.xpath('//div[@class="dg_u"]'): - - # try to extract the url - url_container = result.xpath('.//div[@class="sa_wrapper"]/@data-eventpayload') - if len(url_container) > 0: - url = loads(url_container[0])['purl'] - else: - url = result.xpath('./a/@href')[0] - - # discard results that do not return an external url - # very recent results sometimes don't return the video's url - if url.startswith('/videos/search?'): - continue - - title = extract_text(result.xpath('./a//div[@class="tl"]')) - content = extract_text(result.xpath('.//div[@class="pubInfo"]')) - thumbnail = result.xpath('.//div[@class="vthumb"]/img/@src')[0] + url = result.xpath('./div[@class="mc_vtvc"]/a/@href')[0] + url = 'https://bing.com' + url + title = extract_text(result.xpath('./div/a/div/div[@class="mc_vtvc_title"]/@title')) + content = extract_text(result.xpath('./div/a/div/div/div/div/text()')) + thumbnail = result.xpath('./div/a/div/div/img/@src')[0] results.append({'url': url, 'title': title, @@ -92,7 +81,6 @@ def response(resp): 'thumbnail': thumbnail, 'template': 'videos.html'}) - # first page ignores requested number of results if len(results) >= number_of_results: break From 3ef8533f4d402457808e9d9fb52af982abb8112f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Sat, 10 Feb 2018 20:03:55 +0100 Subject: [PATCH 2/2] fix unit tests --- tests/unit/engines/test_bing_videos.py | 103 +++++++++---------------- 1 file changed, 35 insertions(+), 68 deletions(-) diff --git a/tests/unit/engines/test_bing_videos.py b/tests/unit/engines/test_bing_videos.py index 118754b25..8b303d637 100644 --- a/tests/unit/engines/test_bing_videos.py +++ b/tests/unit/engines/test_bing_videos.py @@ -47,87 +47,54 @@ class TestBingVideosEngine(SearxTestCase): self.assertEqual(bing_videos.response(response), []) html = """ -
-
- -
-
+
+
+ +
+
-
""" response = mock.Mock(text=html) results = bing_videos.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) self.assertEqual(results[0]['title'], 'Title 1') - self.assertEqual(results[0]['url'], 'https://url.com/1') - self.assertEqual(results[0]['content'], 'Content 1') - self.assertEqual(results[0]['thumbnail'], 'thumb_1.jpg') - - html = """ - - """ - response = mock.Mock(text=html) - results = bing_videos.response(response) - self.assertEqual(type(results), list) - self.assertEqual(len(results), 1) - self.assertEqual(results[0]['title'], 'Title 1') - self.assertEqual(results[0]['url'], 'https://url.com/1') + self.assertEqual(results[0]['url'], 'https://bing.com/video') self.assertEqual(results[0]['content'], 'Content 1') self.assertEqual(results[0]['thumbnail'], 'thumb_1.jpg')