From 884eeb8541e0a4cf3d65c2a17e1c2f788cab7fb1 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Mon, 1 Jun 2015 00:00:32 +0200 Subject: [PATCH] New Qwant engines - Web - Images - News - Social media --- searx/engines/qwant.py | 66 +++++++++++ searx/engines/qwant_images.py | 70 +++++++++++ searx/engines/qwant_news.py | 69 +++++++++++ searx/engines/qwant_social.py | 69 +++++++++++ searx/settings.yml | 16 +++ searx/tests/engines/test_qwant.py | 137 +++++++++++++++++++++ searx/tests/engines/test_qwant_images.py | 145 +++++++++++++++++++++++ searx/tests/engines/test_qwant_news.py | 137 +++++++++++++++++++++ searx/tests/engines/test_qwant_social.py | 140 ++++++++++++++++++++++ searx/tests/test_engines.py | 4 + 10 files changed, 853 insertions(+) create mode 100644 searx/engines/qwant.py create mode 100644 searx/engines/qwant_images.py create mode 100644 searx/engines/qwant_news.py create mode 100644 searx/engines/qwant_social.py create mode 100644 searx/tests/engines/test_qwant.py create mode 100644 searx/tests/engines/test_qwant_images.py create mode 100644 searx/tests/engines/test_qwant_news.py create mode 100644 searx/tests/engines/test_qwant_social.py diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py new file mode 100644 index 000000000..91c12a19e --- /dev/null +++ b/searx/engines/qwant.py @@ -0,0 +1,66 @@ +""" + Qwant (Web) + + @website https://qwant.com/ + @provide-api not officially (https://api.qwant.com/api/search/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" + +from urllib import urlencode +from json import loads + +# engine dependent config +categories = ['general'] +paging = True +language_support = True + +# search-url +url = 'https://api.qwant.com/api/search/web?count=10&offset={offset}&f=&{query}' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + + params['url'] = url.format(query=urlencode({'q': query}), + offset=offset) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&locale=' + params['language'].lower() + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + data = search_results.get('data', {}) + + res = data.get('result', {}) + + # parse results + for result in res.get('items', {}): + + title = result['title'] + res_url = result['url'] + content = result['desc'] + + # append result + results.append({'title': title, + 'content': content, + 'url': res_url}) + + # return results + return results diff --git a/searx/engines/qwant_images.py b/searx/engines/qwant_images.py new file mode 100644 index 000000000..1c1753389 --- /dev/null +++ b/searx/engines/qwant_images.py @@ -0,0 +1,70 @@ +""" + Qwant (Images) + + @website https://qwant.com/ + @provide-api not officially (https://api.qwant.com/api/search/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" + +from urllib import urlencode +from json import loads + +# engine dependent config +categories = ['images'] +paging = True +language_support = True + +# search-url +url = 'https://api.qwant.com/api/search/images?count=10&offset={offset}&f=&{query}' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + + params['url'] = url.format(query=urlencode({'q': query}), + offset=offset) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&locale=' + params['language'].lower() + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + data = search_results.get('data', {}) + + res = data.get('result', {}) + + # parse results + for result in res.get('items', {}): + + title = result['title'] + res_url = result['url'] + thumbnail_src = result['thumbnail'] + img_src = result['media'] + + # append result + results.append({'template': 'images.html', + 'url': res_url, + 'title': title, + 'content': '', + 'thumbnail_src': thumbnail_src, + 'img_src': img_src}) + + # return results + return results diff --git a/searx/engines/qwant_news.py b/searx/engines/qwant_news.py new file mode 100644 index 000000000..c4d5be5d3 --- /dev/null +++ b/searx/engines/qwant_news.py @@ -0,0 +1,69 @@ +""" + Qwant (News) + + @website https://qwant.com/ + @provide-api not officially (https://api.qwant.com/api/search/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" + +from urllib import urlencode +from json import loads +from datetime import datetime + +# engine dependent config +categories = ['news'] +paging = True +language_support = True + +# search-url +url = 'https://api.qwant.com/api/search/news?count=10&offset={offset}&f=&{query}' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + + params['url'] = url.format(query=urlencode({'q': query}), + offset=offset) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&locale=' + params['language'].lower() + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + data = search_results.get('data', {}) + + res = data.get('result', {}) + + # parse results + for result in res.get('items', {}): + + title = result['title'] + res_url = result['url'] + content = result['desc'] + published_date = datetime.fromtimestamp(result['date'], None) + + # append result + results.append({'url': res_url, + 'title': title, + 'publishedDate': published_date, + 'content': content}) + + # return results + return results diff --git a/searx/engines/qwant_social.py b/searx/engines/qwant_social.py new file mode 100644 index 000000000..474dfac02 --- /dev/null +++ b/searx/engines/qwant_social.py @@ -0,0 +1,69 @@ +""" + Qwant (social media) + + @website https://qwant.com/ + @provide-api not officially (https://api.qwant.com/api/search/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" + +from urllib import urlencode +from json import loads +from datetime import datetime + +# engine dependent config +categories = ['social media'] +paging = True +language_support = True + +# search-url +url = 'https://api.qwant.com/api/search/social?count=10&offset={offset}&f=&{query}' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + + params['url'] = url.format(query=urlencode({'q': query}), + offset=offset) + + # add language tag if specified + if params['language'] != 'all': + params['url'] += '&locale=' + params['language'].lower() + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + data = search_results.get('data', {}) + + res = data.get('result', {}) + + # parse results + for result in res.get('items', {}): + + title = result['title'] + res_url = result['url'] + content = result['desc'] + published_date = datetime.fromtimestamp(result['date'], None) + + # append result + results.append({'url': res_url, + 'title': title, + 'content': content, + 'publishedDate': published_date}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml index 519ea8be1..7f8229732 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -168,6 +168,22 @@ engines: engine : piratebay shortcut : tpb + - name : qwant + engine : qwant + shortcut : qw + + - name : qwant images + engine : qwant_images + shortcut : qwi + + - name : qwant news + engine : qwant_news + shortcut : qwn + + - name : qwant social + engine : qwant_social + shortcut : qws + - name : kickass engine : kickass shortcut : ka diff --git a/searx/tests/engines/test_qwant.py b/searx/tests/engines/test_qwant.py new file mode 100644 index 000000000..9aa1c7c56 --- /dev/null +++ b/searx/tests/engines/test_qwant.py @@ -0,0 +1,137 @@ +from collections import defaultdict +import mock +from searx.engines import qwant +from searx.testing import SearxTestCase + + +class TestQwantEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + dicto['language'] = 'fr_FR' + params = qwant.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('qwant.com', params['url']) + self.assertIn('fr_fr', params['url']) + + dicto['language'] = 'all' + params = qwant.request(query, dicto) + self.assertFalse('fr' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, qwant.response, None) + self.assertRaises(AttributeError, qwant.response, []) + self.assertRaises(AttributeError, qwant.response, '') + self.assertRaises(AttributeError, qwant.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(qwant.response(response), []) + + response = mock.Mock(text='{"data": {}}') + self.assertEqual(qwant.response(response), []) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "title": "Title", + "score": 9999, + "url": "http://www.url.xyz", + "source": "...", + "desc": "Description", + "date": "", + "_id": "db0aadd62c2a8565567ffc382f5c61fa", + "favicon": "https://s.qwant.com/fav.ico" + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url.xyz') + self.assertEqual(results[0]['content'], 'Description') + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success" + } + """ + response = mock.Mock(text=json) + results = qwant.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_qwant_images.py b/searx/tests/engines/test_qwant_images.py new file mode 100644 index 000000000..bf89f1b01 --- /dev/null +++ b/searx/tests/engines/test_qwant_images.py @@ -0,0 +1,145 @@ +from collections import defaultdict +import mock +from searx.engines import qwant_images +from searx.testing import SearxTestCase + + +class TestQwantImagesEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + dicto['language'] = 'fr_FR' + params = qwant_images.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('qwant.com', params['url']) + self.assertIn('fr_fr', params['url']) + + dicto['language'] = 'all' + params = qwant_images.request(query, dicto) + self.assertFalse('fr' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, qwant_images.response, None) + self.assertRaises(AttributeError, qwant_images.response, []) + self.assertRaises(AttributeError, qwant_images.response, '') + self.assertRaises(AttributeError, qwant_images.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(qwant_images.response(response), []) + + response = mock.Mock(text='{"data": {}}') + self.assertEqual(qwant_images.response(response), []) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "title": "Title", + "type": "image", + "media": "http://www.url.xyz/fullimage.jpg", + "desc": "", + "thumbnail": "http://www.url.xyz/thumbnail.jpg", + "thumb_width": 365, + "thumb_height": 230, + "width": "365", + "height": "230", + "size": "187.7KB", + "url": "http://www.url.xyz", + "_id": "0ffd93fb26f3e192a6020af8fc16fbb1", + "media_fullsize": "http://www.proxy/fullimage.jpg", + "count": 0 + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url.xyz') + self.assertEqual(results[0]['content'], '') + self.assertEqual(results[0]['thumbnail_src'], 'http://www.url.xyz/thumbnail.jpg') + self.assertEqual(results[0]['img_src'], 'http://www.url.xyz/fullimage.jpg') + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success" + } + """ + response = mock.Mock(text=json) + results = qwant_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_qwant_news.py b/searx/tests/engines/test_qwant_news.py new file mode 100644 index 000000000..17cdd3cc1 --- /dev/null +++ b/searx/tests/engines/test_qwant_news.py @@ -0,0 +1,137 @@ +from collections import defaultdict +import mock +from searx.engines import qwant_news +from searx.testing import SearxTestCase + + +class TestQwantNewsEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + dicto['language'] = 'fr_FR' + params = qwant_news.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('qwant.com', params['url']) + self.assertIn('fr_fr', params['url']) + + dicto['language'] = 'all' + params = qwant_news.request(query, dicto) + self.assertFalse('fr' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, qwant_news.response, None) + self.assertRaises(AttributeError, qwant_news.response, []) + self.assertRaises(AttributeError, qwant_news.response, '') + self.assertRaises(AttributeError, qwant_news.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(qwant_news.response(response), []) + + response = mock.Mock(text='{"data": {}}') + self.assertEqual(qwant_news.response(response), []) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "title": "Title", + "score": 9999, + "url": "http://www.url.xyz", + "source": "...", + "desc": "Description", + "date": 1433065411, + "_id": "db0aadd62c2a8565567ffc382f5c61fa", + "favicon": "https://s.qwant.com/fav.ico" + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url.xyz') + self.assertEqual(results[0]['content'], 'Description') + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success" + } + """ + response = mock.Mock(text=json) + results = qwant_news.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/engines/test_qwant_social.py b/searx/tests/engines/test_qwant_social.py new file mode 100644 index 000000000..6e87e9898 --- /dev/null +++ b/searx/tests/engines/test_qwant_social.py @@ -0,0 +1,140 @@ +from collections import defaultdict +import mock +from searx.engines import qwant_social +from searx.testing import SearxTestCase + + +class TestQwantSocialEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + dicto['language'] = 'fr_FR' + params = qwant_social.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('qwant.com', params['url']) + self.assertIn('fr_fr', params['url']) + + dicto['language'] = 'all' + params = qwant_social.request(query, dicto) + self.assertFalse('fr' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, qwant_social.response, None) + self.assertRaises(AttributeError, qwant_social.response, []) + self.assertRaises(AttributeError, qwant_social.response, '') + self.assertRaises(AttributeError, qwant_social.response, '[]') + + response = mock.Mock(text='{}') + self.assertEqual(qwant_social.response(response), []) + + response = mock.Mock(text='{"data": {}}') + self.assertEqual(qwant_social.response(response), []) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "items": [ + { + "_id": "dc0b3f24c93684c7d7f1b0a4c2d9f1b0", + "__index": 32, + "title": "Title", + "img": "img", + "desc": "Description", + "date": 1432643480, + "type": "twitter", + "card": "XXX", + "post": "603176590856556545", + "url": "http://www.url.xyz", + "userUrl": "https://twitter.com/XXX" + } + ], + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_social.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'Title') + self.assertEqual(results[0]['url'], 'http://www.url.xyz') + self.assertEqual(results[0]['content'], 'Description') + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "result": { + "filters": [] + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_social.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success", + "data": { + "query": { + "locale": "en_us", + "query": "Test", + "offset": 10 + }, + "cache": { + "key": "e66aa864c00147a0e3a16ff7a5efafde", + "created": 1433092754, + "expiration": 259200, + "status": "miss", + "age": 0 + } + } + } + """ + response = mock.Mock(text=json) + results = qwant_social.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) + + json = """ + { + "status": "success" + } + """ + response = mock.Mock(text=json) + results = qwant_social.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index d0a4de4b8..4f3088adb 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -25,6 +25,10 @@ from searx.tests.engines.test_mixcloud import * # noqa from searx.tests.engines.test_openstreetmap import * # noqa from searx.tests.engines.test_photon import * # noqa from searx.tests.engines.test_piratebay import * # noqa +from searx.tests.engines.test_qwant import * # noqa +from searx.tests.engines.test_qwant_images import * # noqa +from searx.tests.engines.test_qwant_news import * # noqa +from searx.tests.engines.test_qwant_social import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa