2019-08-05 16:15:40 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
2015-01-25 20:14:37 +01:00
|
|
|
from collections import defaultdict
|
|
|
|
import mock
|
|
|
|
from searx.engines import bing
|
|
|
|
from searx.testing import SearxTestCase
|
|
|
|
|
|
|
|
|
|
|
|
class TestBingEngine(SearxTestCase):
|
|
|
|
|
|
|
|
def test_request(self):
|
2018-03-01 05:30:48 +01:00
|
|
|
bing.supported_languages = ['en', 'fr', 'zh-CHS', 'zh-CHT', 'pt-PT', 'pt-BR']
|
2016-11-30 18:43:03 +01:00
|
|
|
query = u'test_query'
|
2015-01-25 20:14:37 +01:00
|
|
|
dicto = defaultdict(dict)
|
2019-08-05 16:15:40 +02:00
|
|
|
dicto['pageno'] = 1
|
2017-07-20 22:47:20 +02:00
|
|
|
dicto['language'] = 'fr-FR'
|
2016-11-30 18:43:03 +01:00
|
|
|
params = bing.request(query.encode('utf-8'), dicto)
|
2015-01-25 20:14:37 +01:00
|
|
|
self.assertTrue('url' in params)
|
|
|
|
self.assertTrue(query in params['url'])
|
2016-11-07 22:30:20 +01:00
|
|
|
self.assertTrue('language%3AFR' in params['url'])
|
2015-01-25 20:14:37 +01:00
|
|
|
self.assertTrue('bing.com' in params['url'])
|
|
|
|
|
2019-01-06 15:27:46 +01:00
|
|
|
dicto['language'] = 'all'
|
|
|
|
params = bing.request(query.encode('utf-8'), dicto)
|
|
|
|
self.assertTrue('language' in params['url'])
|
|
|
|
|
2015-01-25 20:14:37 +01:00
|
|
|
def test_response(self):
|
2019-08-05 16:15:40 +02:00
|
|
|
dicto = defaultdict(dict)
|
|
|
|
dicto['pageno'] = 1
|
|
|
|
dicto['language'] = 'fr-FR'
|
2015-01-25 20:14:37 +01:00
|
|
|
self.assertRaises(AttributeError, bing.response, None)
|
|
|
|
self.assertRaises(AttributeError, bing.response, [])
|
|
|
|
self.assertRaises(AttributeError, bing.response, '')
|
|
|
|
self.assertRaises(AttributeError, bing.response, '[]')
|
|
|
|
|
2015-08-28 14:51:32 +02:00
|
|
|
response = mock.Mock(text='<html></html>')
|
2019-08-05 16:15:40 +02:00
|
|
|
response.search_params = dicto
|
2015-01-25 20:14:37 +01:00
|
|
|
self.assertEqual(bing.response(response), [])
|
|
|
|
|
2015-08-28 14:51:32 +02:00
|
|
|
response = mock.Mock(text='<html></html>')
|
2019-08-05 16:15:40 +02:00
|
|
|
response.search_params = dicto
|
2015-01-25 20:14:37 +01:00
|
|
|
self.assertEqual(bing.response(response), [])
|
|
|
|
|
|
|
|
html = """
|
2019-08-05 16:15:40 +02:00
|
|
|
<div>
|
|
|
|
<div id="b_tween">
|
|
|
|
<span class="sb_count" data-bm="4">23 900 000 résultats</span>
|
2015-01-25 20:14:37 +01:00
|
|
|
</div>
|
2019-08-05 16:15:40 +02:00
|
|
|
<ol id="b_results" role="main">
|
|
|
|
<div class="sa_cc" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
|
|
|
|
<div Class="sa_mc">
|
|
|
|
<div class="sb_tlst">
|
|
|
|
<h3>
|
|
|
|
<a href="http://this.should.be.the.link/" h="ID=SERP,5124.1">
|
|
|
|
<strong>This</strong> should be the title</a>
|
|
|
|
</h3>
|
|
|
|
</div>
|
|
|
|
<div class="sb_meta"><cite><strong>this</strong>.meta.com</cite>
|
|
|
|
<span class="c_tlbxTrg">
|
|
|
|
<span class="c_tlbxH" H="BASE:CACHEDPAGEDEFAULT" K="SERP,5125.1">
|
|
|
|
</span>
|
|
|
|
</span>
|
|
|
|
</div>
|
|
|
|
<p><strong>This</strong> should be the content.</p>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</ol>
|
2015-01-25 20:14:37 +01:00
|
|
|
</div>
|
|
|
|
"""
|
2015-08-28 14:51:32 +02:00
|
|
|
response = mock.Mock(text=html)
|
2019-08-05 16:15:40 +02:00
|
|
|
response.search_params = dicto
|
2015-01-25 20:14:37 +01:00
|
|
|
results = bing.response(response)
|
|
|
|
self.assertEqual(type(results), list)
|
2019-08-05 16:15:40 +02:00
|
|
|
self.assertEqual(len(results), 2)
|
2015-01-25 20:14:37 +01:00
|
|
|
self.assertEqual(results[0]['title'], 'This should be the title')
|
|
|
|
self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
|
|
|
|
self.assertEqual(results[0]['content'], 'This should be the content.')
|
2019-08-05 16:15:40 +02:00
|
|
|
self.assertEqual(results[-1]['number_of_results'], 23900000)
|
2015-01-25 20:14:37 +01:00
|
|
|
|
|
|
|
html = """
|
2019-08-05 16:15:40 +02:00
|
|
|
<div>
|
|
|
|
<div id="b_tween">
|
|
|
|
<span class="sb_count" data-bm="4">9-18 résultats sur 23 900 000</span>
|
2015-01-25 20:14:37 +01:00
|
|
|
</div>
|
2019-08-05 16:15:40 +02:00
|
|
|
<ol id="b_results" role="main">
|
|
|
|
<li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
|
|
|
|
<div Class="sa_mc">
|
|
|
|
<div class="sb_tlst">
|
|
|
|
<h2>
|
|
|
|
<a href="http://this.should.be.the.link/" h="ID=SERP,5124.1">
|
|
|
|
<strong>This</strong> should be the title</a>
|
|
|
|
</h2>
|
|
|
|
</div>
|
|
|
|
<div class="sb_meta"><cite><strong>this</strong>.meta.com</cite>
|
|
|
|
<span class="c_tlbxTrg">
|
|
|
|
<span class="c_tlbxH" H="BASE:CACHEDPAGEDEFAULT" K="SERP,5125.1">
|
|
|
|
</span>
|
|
|
|
</span>
|
|
|
|
</div>
|
|
|
|
<p><strong>This</strong> should be the content.</p>
|
|
|
|
</div>
|
|
|
|
</li>
|
|
|
|
</ol>
|
|
|
|
</div>
|
2015-01-25 20:14:37 +01:00
|
|
|
"""
|
2019-08-05 16:15:40 +02:00
|
|
|
dicto['pageno'] = 2
|
2015-08-28 14:51:32 +02:00
|
|
|
response = mock.Mock(text=html)
|
2019-08-05 16:15:40 +02:00
|
|
|
response.search_params = dicto
|
2015-01-25 20:14:37 +01:00
|
|
|
results = bing.response(response)
|
|
|
|
self.assertEqual(type(results), list)
|
2019-08-05 16:15:40 +02:00
|
|
|
self.assertEqual(len(results), 2)
|
2015-01-25 20:14:37 +01:00
|
|
|
self.assertEqual(results[0]['title'], 'This should be the title')
|
|
|
|
self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
|
|
|
|
self.assertEqual(results[0]['content'], 'This should be the content.')
|
2019-08-05 16:15:40 +02:00
|
|
|
self.assertEqual(results[-1]['number_of_results'], 23900000)
|
|
|
|
|
|
|
|
html = """
|
|
|
|
<div>
|
|
|
|
<div id="b_tween">
|
|
|
|
<span class="sb_count" data-bm="4">23 900 000 résultats</span>
|
|
|
|
</div>
|
|
|
|
<ol id="b_results" role="main">
|
|
|
|
<li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
|
|
|
|
<div Class="sa_mc">
|
|
|
|
<div class="sb_tlst">
|
|
|
|
<h2>
|
|
|
|
<a href="http://this.should.be.the.link/" h="ID=SERP,5124.1">
|
|
|
|
<strong>This</strong> should be the title</a>
|
|
|
|
</h2>
|
|
|
|
</div>
|
|
|
|
<div class="sb_meta"><cite><strong>this</strong>.meta.com</cite>
|
|
|
|
<span class="c_tlbxTrg">
|
|
|
|
<span class="c_tlbxH" H="BASE:CACHEDPAGEDEFAULT" K="SERP,5125.1">
|
|
|
|
</span>
|
|
|
|
</span>
|
|
|
|
</div>
|
|
|
|
<p><strong>This</strong> should be the content.</p>
|
|
|
|
</div>
|
|
|
|
</li>
|
|
|
|
</ol>
|
|
|
|
</div>
|
|
|
|
"""
|
|
|
|
dicto['pageno'] = 33900000
|
|
|
|
response = mock.Mock(text=html)
|
|
|
|
response.search_params = dicto
|
|
|
|
results = bing.response(response)
|
|
|
|
self.assertEqual(bing.response(response), [])
|
2016-12-15 07:34:43 +01:00
|
|
|
|
|
|
|
def test_fetch_supported_languages(self):
|
|
|
|
html = """<html></html>"""
|
|
|
|
response = mock.Mock(text=html)
|
|
|
|
results = bing._fetch_supported_languages(response)
|
|
|
|
self.assertEqual(type(results), list)
|
|
|
|
self.assertEqual(len(results), 0)
|
|
|
|
|
|
|
|
html = """
|
|
|
|
<html>
|
|
|
|
<body>
|
|
|
|
<form>
|
|
|
|
<div id="limit-languages">
|
|
|
|
<div>
|
|
|
|
<div><input id="es" value="es"></input></div>
|
|
|
|
</div>
|
|
|
|
<div>
|
|
|
|
<div><input id="pt_BR" value="pt_BR"></input></div>
|
|
|
|
<div><input id="pt_PT" value="pt_PT"></input></div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</form>
|
|
|
|
</body>
|
|
|
|
</html>
|
|
|
|
"""
|
|
|
|
response = mock.Mock(text=html)
|
|
|
|
languages = bing._fetch_supported_languages(response)
|
|
|
|
self.assertEqual(type(languages), list)
|
|
|
|
self.assertEqual(len(languages), 3)
|
|
|
|
self.assertIn('es', languages)
|
|
|
|
self.assertIn('pt-BR', languages)
|
|
|
|
self.assertIn('pt-PT', languages)
|