From 046e1907213ac6317deec045735a0ca36ec8a136 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Sat, 18 Jan 2014 21:53:59 +0100 Subject: [PATCH 1/6] [mod] function name --- searx/engines/__init__.py | 4 ++-- searx/utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 2bb0f4724..36857253a 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -23,7 +23,7 @@ from itertools import izip_longest, chain from operator import itemgetter from urlparse import urlparse from searx import settings -from searx.utils import get_useragent +from searx.utils import gen_useragent import ConfigParser import sys from datetime import datetime @@ -153,7 +153,7 @@ def search(query, request, selected_engines): suggestions = set() number_of_searches += 1 #user_agent = request.headers.get('User-Agent', '') - user_agent = get_useragent() + user_agent = gen_useragent() for selected_engine in selected_engines: if selected_engine['name'] not in engines: diff --git a/searx/utils.py b/searx/utils.py index 862185922..416055dfa 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -5,7 +5,7 @@ import codecs import cStringIO import re -def get_useragent(): +def gen_useragent(): # TODO return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0" From 39ebe1d5193bf62340bc101d51cab77df3c06f7e Mon Sep 17 00:00:00 2001 From: asciimoo Date: Sat, 18 Jan 2014 21:55:42 +0100 Subject: [PATCH 2/6] [fix] weight configurable via engines.cfg --- searx/engines/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 36857253a..5601ceb06 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -118,8 +118,6 @@ def score_results(results): weight = 1.0 if hasattr(engines[res['engine']], 'weight'): weight = float(engines[res['engine']].weight) - elif res['engine'] in settings.weights: - weight = float(settings.weights[res['engine']]) score = int((flat_len - i)/engines_len)*weight+1 duplicated = False for new_res in results: From 3afdd1d9941527e23cd7c05d2c15dd24a32de834 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Sun, 19 Jan 2014 00:17:02 +0100 Subject: [PATCH 3/6] [enh] settings unification - new dependency: pyyaml --- .gitignore | 2 + README.md | 3 +- engines.cfg_sample | 99 ----------------------------------- requirements.txt | 1 + searx/__init__.py | 22 ++++++++ searx/engines/__init__.py | 23 ++++---- searx/webapp.py | 12 ++--- settings.yml | 107 ++++++++++++++++++++++++++++++++++++++ setup.py | 1 + versions.cfg | 1 + 10 files changed, 150 insertions(+), 121 deletions(-) delete mode 100644 engines.cfg_sample create mode 100644 settings.yml diff --git a/.gitignore b/.gitignore index 7e6560890..ed5ed1624 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ env engines.cfg .installed.cfg +.coverage +covearge/ setup.cfg *.pyc diff --git a/README.md b/README.md index 1692de924..e6638cf74 100644 --- a/README.md +++ b/README.md @@ -25,8 +25,7 @@ List of [running instances](https://github.com/asciimoo/searx/wiki/Searx-instanc * clone source: `git clone git@github.com:asciimoo/searx.git && cd searx` * install dependencies: `pip install -r requirements.txt` -* edit your [searx/settings.py](https://github.com/asciimoo/searx/blob/master/searx/settings.py) (set your `secret_key`!) -* rename `engines.cfg_sample` to `engines.cfg` +* edit your [settings.yml](https://github.com/asciimoo/searx/blob/master/settings.yml) (set your `secret_key`!) * run `python searx/webapp.py` to start the application For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation) diff --git a/engines.cfg_sample b/engines.cfg_sample deleted file mode 100644 index ef9a32f34..000000000 --- a/engines.cfg_sample +++ /dev/null @@ -1,99 +0,0 @@ -[wikipedia] -engine = mediawiki -url = https://en.wikipedia.org/ -number_of_results = 1 - -[bing] -engine = bing -locale = en-US - -[currency] -engine=currency_convert -categories = general - -[deviantart] -engine = deviantart -categories = images - -[ddg definitions] -engine = duckduckgo_definitions - -[duckduckgo] -engine = duckduckgo -locale = en-us - -[filecrop] -engine = filecrop -categories = files - -[flickr] -engine = flickr -categories = images - -[github] -engine = github -categories = it - -[google] -engine = json_engine -search_url = https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query} -categories = general -url_query = /responseData/results/unescapedUrl -content_query = /responseData/results/content -title_query = /responseData/results/titleNoFormatting - -[google images] -engine = google_images -categories = images - -[piratebay] -engine = piratebay -categories = videos, music, files - -[soundcloud] -engine = soundcloud -categories = music - -[stackoverflow] -engine = stackoverflow -categories = it - -[startpage] -engine = startpage - -[twitter] -engine = twitter -categories = social media - -[urbandictionary] -engine = xpath -search_url = http://www.urbandictionary.com/define.php?term={query} -url_xpath = //div[@class="word"]//a/@href -title_xpath = //div[@class="word"]//a -content_xpath = //div[@class="definition"] - -[yahoo] -engine = xpath -search_url = http://search.yahoo.com/search?p={query} -results_xpath = //div[@class="res"] -url_xpath = .//h3/a/@href -title_xpath = .//h3/a -content_xpath = .//div[@class="abstr"] -suggestion_xpath = //div[@id="satat"]//a - -[youtube] -engine = youtube -categories = videos - -[dailymotion] -engine = dailymotion -locale = en_US -categories = videos - -[vimeo] -engine = vimeo -categories = videos -results_xpath = //div[@id="browse_content"]/ol/li -url_xpath=./a/@href -title_xpath=./a/div[@class="data"]/p[@class="title"]/text() -content_xpath=./a/img/@src diff --git a/requirements.txt b/requirements.txt index ed27ce9c8..2ff135f16 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ flask grequests lxml +pyyaml diff --git a/searx/__init__.py b/searx/__init__.py index e69de29bb..e313306e3 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -0,0 +1,22 @@ +from os import environ +from os.path import realpath, dirname, join +try: + from yaml import load +except: + from sys import exit, stderr + stderr.write('[E] install pyyaml\n') + exit(2) + + +searx_dir = realpath(dirname(realpath(__file__))+'/../') +engine_dir = dirname(realpath(__file__)) + +if 'SEARX_SETTINGS_PATH' in environ: + settings_path = environ['SEARX_SETTINGS_PATH'] +else: + settings_path = join(searx_dir, 'settings.yml') + + +with open(settings_path) as settings_yaml: + settings = load(settings_yaml) + diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 5601ceb06..457af4cda 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -24,15 +24,11 @@ from operator import itemgetter from urlparse import urlparse from searx import settings from searx.utils import gen_useragent -import ConfigParser import sys from datetime import datetime engine_dir = dirname(realpath(__file__)) -searx_dir = join(engine_dir, '../../') -engines_config = ConfigParser.SafeConfigParser() -engines_config.read(join(searx_dir, 'engines.cfg')) number_of_searches = 0 engines = {} @@ -48,24 +44,23 @@ def load_module(filename): module.name = modname return module -if not engines_config.sections(): - print '[E] Error no engines found. Edit your engines.cfg' +if not 'engines' in settings or not settings['engines']: + print '[E] Error no engines found. Edit your settings.yml' exit(2) -for engine_config_name in engines_config.sections(): - engine_data = engines_config.options(engine_config_name) - engine = load_module(engines_config.get(engine_config_name, 'engine')+'.py') - engine.name = engine_config_name +for engine_data in settings['engines']: + engine_name = engine_data['engine'] + engine = load_module(engine_name+'.py') for param_name in engine_data: if param_name == 'engine': continue if param_name == 'categories': - if engines_config.get(engine_config_name, param_name) == 'none': + if engine_data['categories'] == 'none': engine.categories = [] else: - engine.categories = map(str.strip, engines_config.get(engine_config_name, param_name).split(',')) + engine.categories = map(str.strip, engine_data['categories'].split(',')) continue - setattr(engine, param_name, engines_config.get(engine_config_name, param_name)) + setattr(engine, param_name, engine_data[param_name]) for engine_attr in dir(engine): if engine_attr.startswith('_'): continue @@ -170,7 +165,7 @@ def search(query, request, selected_engines): request_args = dict(headers = request_params['headers'] ,hooks = dict(response=callback) ,cookies = request_params['cookies'] - ,timeout = settings.request_timeout + ,timeout = settings['server']['request_timeout'] ) if request_params['method'] == 'GET': diff --git a/searx/webapp.py b/searx/webapp.py index 15ab17d15..de68d73e6 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -41,7 +41,7 @@ from searx.utils import highlight_content, html_to_text app = Flask(__name__) -app.secret_key = settings.secret_key +app.secret_key = settings['server']['secret_key'] opensearch_xml = ''' @@ -58,8 +58,8 @@ opensearch_xml = ''' def get_base_url(): - if settings.base_url: - hostname = settings.base_url + if settings['server']['base_url']: + hostname = settings['server']['base_url'] else: scheme = 'http' if request.is_secure: @@ -243,9 +243,9 @@ def run(): from gevent import monkey monkey.patch_all() - app.run(debug = settings.debug - ,use_debugger = settings.debug - ,port = settings.port + app.run(debug = settings['server']['debug'] + ,use_debugger = settings['server']['debug'] + ,port = settings['server']['port'] ) diff --git a/settings.yml b/settings.yml new file mode 100644 index 000000000..b7c82cc72 --- /dev/null +++ b/settings.yml @@ -0,0 +1,107 @@ +server: + port : 8888 + secret_key : "ultrasecretkey" # change this! + debug : True + request_timeout : 3.0 # seconds + base_url: False + +engines: + - name : wikipedia + engine : mediawiki + url : https://en.wikipedia.org/ + number_of_results : 1 + + - name : bing + engine : bing + locale : en-US + + - name : currency + engine : currency_convert + categories : general + + - name : deviantart + engine : deviantart + categories : images + + - name : ddg definitions + engine : duckduckgo_definitions + + - name : duckduckgo + engine : duckduckgo + locale : en-us + + - name : filecrop + engine : filecrop + categories : files + + - name : flickr + engine : flickr + categories : images + + - name : github + engine : github + categories : it + + - name : google + engine : json_engine + search_url : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query} + categories : general + url_query : /responseData/results/unescapedUrl + content_query : /responseData/results/content + title_query : /responseData/results/titleNoFormatting + + - name : google images + engine : google_images + categories : images + + - name : piratebay + engine : piratebay + categories : videos, music, files + + - name : soundcloud + engine : soundcloud + categories : music + + - name : stackoverflow + engine : stackoverflow + categories : it + + - name : startpage + engine : startpage + + - name : twitter + engine : twitter + categories : social media + + - name : urbandictionary + engine : xpath + search_url : http://www.urbandictionary.com/define.php?term={query} + url_xpath : //div[@class="word"]//a/@href + title_xpath : //div[@class="word"]//a + content_xpath : //div[@class="definition"] + + - name : yahoo + engine : xpath + search_url : http://search.yahoo.com/search?p={query} + results_xpath : //div[@class="res"] + url_xpath : .//h3/a/@href + title_xpath : .//h3/a + content_xpath : .//div[@class="abstr"] + suggestion_xpath : //div[@id="satat"]//a + + - name : youtube + engine : youtube + categories : videos + + - name : dailymotion + engine : dailymotion + locale : en_US + categories : videos + + - name : vimeo + engine : vimeo + categories : videos + results_xpath : //div[@id="browse_content"]/ol/li + url_xpath : ./a/@href + title_xpath : ./a/div[@class="data"]/p[@class="title"]/text() + content_xpath : ./a/img/@src diff --git a/setup.py b/setup.py index e1ef52489..5b3d589ab 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,7 @@ setup( 'flask', 'grequests', 'lxml', + 'pyyaml', 'setuptools', ], extras_require={ diff --git a/versions.cfg b/versions.cfg index 6294a6a73..dd1b610d1 100644 --- a/versions.cfg +++ b/versions.cfg @@ -16,6 +16,7 @@ mccabe = 0.2.1 pep8 = 1.4.6 plone.testing = 4.0.8 pyflakes = 0.7.3 +pyyaml = 3.10 requests = 2.2.0 robotframework-debuglibrary = 0.3 robotframework-httplibrary = 0.4.2 From 3f21dd56e3fbb469d61f47f21209f29f0e73945c Mon Sep 17 00:00:00 2001 From: asciimoo Date: Sun, 19 Jan 2014 00:33:16 +0100 Subject: [PATCH 4/6] [fix] unused settings.py removed --- searx/settings.py | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 searx/settings.py diff --git a/searx/settings.py b/searx/settings.py deleted file mode 100644 index 70b7a4514..000000000 --- a/searx/settings.py +++ /dev/null @@ -1,16 +0,0 @@ - -port = 8888 - -secret_key = "ultrasecretkey" # change this! - -debug = True - -request_timeout = 5.0 # seconds - -weights = {} # 'search_engine_name': float(weight) | default is 1.0 - -blacklist = [] # search engine blacklist - -categories = {} # custom search engine categories - -base_url = None # "https://your.domain.tld/" or None (to use request parameters) From f3749434f1545d802450dbed126e658b1643292f Mon Sep 17 00:00:00 2001 From: asciimoo Date: Sun, 19 Jan 2014 17:17:52 +0100 Subject: [PATCH 5/6] [fix] new settings import --- searx/webapp.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/searx/webapp.py b/searx/webapp.py index de68d73e6..ec02e8f5c 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -22,13 +22,7 @@ import sys if __name__ == "__main__": sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../')) -# first argument is for specifying settings module, used mostly by robot tests -from sys import argv -if len(argv) == 2: - from importlib import import_module - settings = import_module('searx.' + argv[1]) -else: - from searx import settings +from searx import settings from flask import Flask, request, render_template, url_for, Response, make_response, redirect from searx.engines import search, categories, engines, get_engines_stats From 78f525aa94375dabd0e072a782430d5dd9702ca1 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Sun, 19 Jan 2014 17:41:00 +0100 Subject: [PATCH 6/6] [mod] about page mod --- searx/templates/about.html | 1 - 1 file changed, 1 deletion(-) diff --git a/searx/templates/about.html b/searx/templates/about.html index 931578b95..4e3f4bf4e 100644 --- a/searx/templates/about.html +++ b/searx/templates/about.html @@ -10,7 +10,6 @@
  • Maybe Searx won’t offer you as personalised results as Google, but it doesn't make a profile about you
  • Searx doesn't care about what you search, never shares anything with a third party, and it can't be used to compromise you
  • -
  • Searx doesn't make money on ads and it isn't customised based on your interests. You get the pure search results
  • Searx is a free software, the code is 100% open and you can help to make it better. See more on github

If you do care about privacy, want to be a conscious user, moreover believe