From 04f7118d0a0693906ef57fa83f01d29eb366a45e Mon Sep 17 00:00:00 2001 From: Thomas Pointhuber Date: Sun, 8 Feb 2015 14:12:14 +0100 Subject: [PATCH 1/2] [enh] add gigablast engine --- searx/engines/gigablast.py | 63 +++++++++++++++++++++++++++ searx/settings.yml | 4 ++ searx/tests/engines/test_gigablast.py | 57 ++++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 4 files changed, 125 insertions(+) create mode 100644 searx/engines/gigablast.py create mode 100644 searx/tests/engines/test_gigablast.py diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py new file mode 100644 index 000000000..8749c3256 --- /dev/null +++ b/searx/engines/gigablast.py @@ -0,0 +1,63 @@ +## Gigablast (Web) +# +# @website http://gigablast.com +# @provide-api yes (http://gigablast.com/api.html) +# +# @using-api yes +# @results XML +# @stable yes +# @parse url, title, content + +from urllib import urlencode +from cgi import escape +from lxml import etree + +# engine dependent config +categories = ['general'] +paging = True +number_of_results = 5 + +# search-url +base_url = 'http://gigablast.com/' +search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0' + +# specific xpath variables +results_xpath = '//response//result' +url_xpath = './/url' +title_xpath = './/title' +content_xpath = './/sum' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * number_of_results + + search_path = search_string.format( + query=urlencode({'q': query}), + offset=offset, + number_of_results=number_of_results) + + params['url'] = base_url + search_path + + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = etree.fromstring(resp.content) + + # parse results + for result in dom.xpath(results_xpath): + url = result.xpath(url_xpath)[0].text + title = result.xpath(title_xpath)[0].text + content = escape(result.xpath(content_xpath)[0].text) + + # append result + results.append({'url': url, + 'title': title, + 'content': content}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml index 8c9941b36..98dadef3d 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -103,6 +103,10 @@ engines: shortcut : gf disabled : True + - name : gigablast + engine : gigablast + shortcut : gb + - name : github engine : github shortcut : gh diff --git a/searx/tests/engines/test_gigablast.py b/searx/tests/engines/test_gigablast.py new file mode 100644 index 000000000..38264913f --- /dev/null +++ b/searx/tests/engines/test_gigablast.py @@ -0,0 +1,57 @@ +from collections import defaultdict +import mock +from searx.engines import gigablast +from searx.testing import SearxTestCase + + +class TestGigablastEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = gigablast.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('gigablast.com' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, gigablast.response, None) + self.assertRaises(AttributeError, gigablast.response, []) + self.assertRaises(AttributeError, gigablast.response, '') + self.assertRaises(AttributeError, gigablast.response, '[]') + + response = mock.Mock(content='') + self.assertEqual(gigablast.response(response), []) + + response = mock.Mock(content='') + self.assertEqual(gigablast.response(response), []) + + xml = """ + + 5941888 + 1 + + <![CDATA[This should be the title]]> + + + 90.5 + 145414002633 + 2660021087 + 2660021087 + 1320519373 + 1320519373 + 4294967295 + 0 + + + + + """ + response = mock.Mock(content=xml) + results = gigablast.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This should be the title') + self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') + self.assertEqual(results[0]['content'], 'This should be the content.') diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index ff8185b1e..30f2d0912 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -9,6 +9,7 @@ from searx.tests.engines.test_digg import * # noqa from searx.tests.engines.test_dummy import * # noqa from searx.tests.engines.test_flickr import * # noqa from searx.tests.engines.test_flickr_noapi import * # noqa +from searx.tests.engines.test_gigablast import * # noqa from searx.tests.engines.test_github import * # noqa from searx.tests.engines.test_www1x import * # noqa from searx.tests.engines.test_google_images import * # noqa From dd4686a3886458f600427aba0ed7b9666b3644db Mon Sep 17 00:00:00 2001 From: Thomas Pointhuber Date: Sun, 8 Feb 2015 14:49:46 +0100 Subject: [PATCH 2/2] [enh] add blekko_images engine --- searx/engines/blekko_images.py | 56 +++++++++++++++++++ searx/settings.yml | 5 ++ searx/tests/engines/test_blekko_images.py | 65 +++++++++++++++++++++++ searx/tests/test_engines.py | 1 + 4 files changed, 127 insertions(+) create mode 100644 searx/engines/blekko_images.py create mode 100644 searx/tests/engines/test_blekko_images.py diff --git a/searx/engines/blekko_images.py b/searx/engines/blekko_images.py new file mode 100644 index 000000000..2bae9c35e --- /dev/null +++ b/searx/engines/blekko_images.py @@ -0,0 +1,56 @@ +## Blekko (Images) +# +# @website https://blekko.com +# @provide-api yes (inofficial) +# +# @using-api yes +# @results JSON +# @stable yes +# @parse url, title, img_src + +from json import loads +from urllib import urlencode + +# engine dependent config +categories = ['images'] +paging = True + +# search-url +base_url = 'https://blekko.com' +search_url = '/api/images?{query}&c={c}' + + +# do search-request +def request(query, params): + c = (params['pageno'] - 1) * 48 + + params['url'] = base_url +\ + search_url.format(query=urlencode({'q': query}), + c=c) + + if params['pageno'] != 1: + params['url'] += '&page={pageno}'.format(pageno=(params['pageno']-1)) + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if not search_results: + return [] + + for result in search_results: + # append result + results.append({'url': result['page_url'], + 'title': result['title'], + 'content': '', + 'img_src': result['url'], + 'template': 'images.html'}) + + # return results + return results diff --git a/searx/settings.yml b/searx/settings.yml index 98dadef3d..d7f7b96c0 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -33,6 +33,11 @@ engines: locale : en-US shortcut : bin + - name : blekko images + engine : blekko_images + locale : en-US + shortcut : bli + - name : btdigg engine : btdigg shortcut : bt diff --git a/searx/tests/engines/test_blekko_images.py b/searx/tests/engines/test_blekko_images.py new file mode 100644 index 000000000..6a5388aae --- /dev/null +++ b/searx/tests/engines/test_blekko_images.py @@ -0,0 +1,65 @@ +from collections import defaultdict +import mock +from searx.engines import blekko_images +from searx.testing import SearxTestCase + + +class TestBlekkoImagesEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = blekko_images.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('blekko.com' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, blekko_images.response, None) + self.assertRaises(AttributeError, blekko_images.response, []) + self.assertRaises(AttributeError, blekko_images.response, '') + self.assertRaises(AttributeError, blekko_images.response, '[]') + + response = mock.Mock(text='[]') + self.assertEqual(blekko_images.response(response), []) + + json = """ + [ + { + "c": 1, + "page_url": "http://result_url.html", + "title": "Photo title", + "tn_url": "http://ts1.mm.bing.net/th?id=HN.608050619474382748&pid=15.1", + "url": "http://result_image.jpg" + }, + { + "c": 2, + "page_url": "http://companyorange.simpsite.nl/OSM", + "title": "OSM", + "tn_url": "http://ts2.mm.bing.net/th?id=HN.608048068264919461&pid=15.1", + "url": "http://simpsite.nl/userdata2/58985/Home/OSM.bmp" + }, + { + "c": 3, + "page_url": "http://invincible.webklik.nl/page/osm", + "title": "OSM", + "tn_url": "http://ts1.mm.bing.net/th?id=HN.608024514657649476&pid=15.1", + "url": "http://www.webklik.nl/user_files/2009_09/65324/osm.gif" + }, + { + "c": 4, + "page_url": "http://www.offshorenorway.no/event/companyDetail/id/12492", + "title": "Go to OSM Offshore AS homepage", + "tn_url": "http://ts2.mm.bing.net/th?id=HN.608054265899847285&pid=15.1", + "url": "http://www.offshorenorway.no/firmalogo/OSM-logo.png" + } + ] + """ + response = mock.Mock(text=json) + results = blekko_images.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 4) + self.assertEqual(results[0]['title'], 'Photo title') + self.assertEqual(results[0]['url'], 'http://result_url.html') + self.assertEqual(results[0]['img_src'], 'http://result_image.jpg') diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 30f2d0912..651da6dc2 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -1,6 +1,7 @@ from searx.tests.engines.test_bing import * # noqa from searx.tests.engines.test_bing_images import * # noqa from searx.tests.engines.test_bing_news import * # noqa +from searx.tests.engines.test_blekko_images import * # noqa from searx.tests.engines.test_btdigg import * # noqa from searx.tests.engines.test_dailymotion import * # noqa from searx.tests.engines.test_deezer import * # noqa