Merge branch 'master' of https://github.com/asciimoo/searx

2024-11-19 19:00:10 +01:00 · 2014-01-19 19:50:17 +01:00 · 2014-01-19 19:50:17 +01:00 · 9e72ebe064
commit 9e72ebe064
parent a62b94e21c 78f525aa94
13 changed files with 154 additions and 150 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,8 @@
 env
 engines.cfg
 .installed.cfg
 .coverage
 covearge/
 setup.cfg
 *.pyc
--- a/README.md
+++ b/README.md
@ -25,8 +25,7 @@ List of [running instances](https://github.com/asciimoo/searx/wiki/Searx-instanc
 * clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
 * install dependencies: `pip install -r requirements.txt`
-* edit your [searx/settings.py](https://github.com/asciimoo/searx/blob/master/searx/settings.py) (set your `secret_key`!)
+* edit your [settings.yml](https://github.com/asciimoo/searx/blob/master/settings.yml) (set your `secret_key`!)
 * rename `engines.cfg_sample` to `engines.cfg`
 * run `python searx/webapp.py` to start the application
 For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation)
--- a/engines.cfg_sample
+++ b/engines.cfg_sample
@ -1,99 +0,0 @@
 [wikipedia]
 engine = mediawiki
 url    = https://en.wikipedia.org/
 number_of_results = 1
 [bing]
 engine = bing
 locale = en-US
 [currency]
 engine=currency_convert
 categories = general
 [deviantart]
 engine = deviantart
 categories = images
 [ddg definitions]
 engine = duckduckgo_definitions
 [duckduckgo]
 engine = duckduckgo
 locale = en-us
 [filecrop]
 engine = filecrop
 categories = files
 [flickr]
 engine = flickr
 categories = images
 [github]
 engine = github
 categories = it
 [google]
 engine        = json_engine
 search_url    = https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
 categories    = general
 url_query     = /responseData/results/unescapedUrl
 content_query = /responseData/results/content
 title_query   = /responseData/results/titleNoFormatting
 [google images]
 engine = google_images
 categories = images
 [piratebay]
 engine = piratebay
 categories = videos, music, files
 [soundcloud]
 engine = soundcloud
 categories = music
 [stackoverflow]
 engine = stackoverflow
 categories = it
 [startpage]
 engine = startpage
 [twitter]
 engine = twitter
 categories = social media
 [urbandictionary]
 engine        = xpath
 search_url    = http://www.urbandictionary.com/define.php?term={query}
 url_xpath     = //div[@class="word"]//a/@href
 title_xpath   = //div[@class="word"]//a
 content_xpath = //div[@class="definition"]
 [yahoo]
 engine           = xpath
 search_url       = http://search.yahoo.com/search?p={query}
 results_xpath    = //div[@class="res"]
 url_xpath        = .//h3/a/@href
 title_xpath      = .//h3/a
 content_xpath    = .//div[@class="abstr"]
 suggestion_xpath = //div[@id="satat"]//a
 [youtube]
 engine = youtube
 categories = videos
 [dailymotion]
 engine = dailymotion
 locale = en_US
 categories = videos
 [vimeo]
 engine = vimeo
 categories = videos
 results_xpath = //div[@id="browse_content"]/ol/li
 url_xpath=./a/@href
 title_xpath=./a/div[@class="data"]/p[@class="title"]/text()
 content_xpath=./a/img/@src
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,4 @@
 flask
 grequests
 lxml
 pyyaml
--- a/searx/init.py
+++ b/searx/init.py
@ -0,0 +1,22 @@
 from os import environ
 from os.path import realpath, dirname, join
 try:
    from yaml import load
 except:
    from sys import exit, stderr
    stderr.write('[E] install pyyaml\n')
    exit(2)
 searx_dir  = realpath(dirname(realpath(__file__))+'/../')
 engine_dir = dirname(realpath(__file__))
 if 'SEARX_SETTINGS_PATH' in environ:
    settings_path = environ['SEARX_SETTINGS_PATH']
 else:
    settings_path = join(searx_dir, 'settings.yml')
 with open(settings_path) as settings_yaml:
    settings = load(settings_yaml)
--- a/searx/engines/init.py
+++ b/searx/engines/init.py
@ -23,16 +23,12 @@ from itertools import izip_longest, chain
 from operator import itemgetter
 from urlparse import urlparse
 from searx import settings
-from searx.utils import get_useragent
+from searx.utils import gen_useragent
 import ConfigParser
 import sys
 from datetime import datetime
 engine_dir = dirname(realpath(__file__))
 searx_dir  = join(engine_dir, '../../')
 engines_config = ConfigParser.SafeConfigParser()
 engines_config.read(join(searx_dir, 'engines.cfg'))
 number_of_searches = 0
 engines = {}
@ -48,24 +44,23 @@ def load_module(filename):
    module.name = modname
    return module
-if not engines_config.sections():
+if not 'engines' in settings or not settings['engines']:
-    print '[E] Error no engines found. Edit your engines.cfg'
+    print '[E] Error no engines found. Edit your settings.yml'
    exit(2)
-for engine_config_name in engines_config.sections():
+for engine_data in settings['engines']:
-    engine_data = engines_config.options(engine_config_name)
+    engine_name = engine_data['engine']
-    engine = load_module(engines_config.get(engine_config_name, 'engine')+'.py')
+    engine = load_module(engine_name+'.py')
    engine.name = engine_config_name
    for param_name in engine_data:
        if param_name == 'engine':
            continue
        if param_name == 'categories':
-            if engines_config.get(engine_config_name, param_name) == 'none':
+            if engine_data['categories'] == 'none':
                engine.categories = []
            else:
-                engine.categories = map(str.strip, engines_config.get(engine_config_name, param_name).split(','))
+                engine.categories = map(str.strip, engine_data['categories'].split(','))
            continue
-        setattr(engine, param_name, engines_config.get(engine_config_name, param_name))
+        setattr(engine, param_name, engine_data[param_name])
    for engine_attr in dir(engine):
        if engine_attr.startswith('_'):
            continue
@ -118,8 +113,6 @@ def score_results(results):
        weight = 1.0
        if hasattr(engines[res['engine']], 'weight'):
            weight = float(engines[res['engine']].weight)
        elif res['engine'] in settings.weights:
            weight = float(settings.weights[res['engine']])
        score = int((flat_len - i)/engines_len)*weight+1
        duplicated = False
        for new_res in results:
@ -153,7 +146,7 @@ def search(query, request, selected_engines):
    suggestions = set()
    number_of_searches += 1
    #user_agent = request.headers.get('User-Agent', '')
-    user_agent = get_useragent()
+    user_agent = gen_useragent()
    for selected_engine in selected_engines:
        if selected_engine['name'] not in engines:
@ -172,7 +165,7 @@ def search(query, request, selected_engines):
        request_args = dict(headers = request_params['headers']
                           ,hooks   = dict(response=callback)
                           ,cookies = request_params['cookies']
-                           ,timeout = settings.request_timeout
+                           ,timeout = settings['server']['request_timeout']
                           )
        if request_params['method'] == 'GET':
--- a/searx/settings.py
+++ b/searx/settings.py
@ -1,16 +0,0 @@
 port = 8888
 secret_key = "ultrasecretkey" # change this!
 debug = True
 request_timeout = 5.0 # seconds
 weights = {} # 'search_engine_name': float(weight) | default is 1.0
 blacklist = [] # search engine blacklist
 categories = {} # custom search engine categories
 base_url = None # "https://your.domain.tld/" or None (to use request parameters)
--- a/searx/templates/about.html
+++ b/searx/templates/about.html
@ -10,7 +10,6 @@
    <ul>
        <li>Maybe Searx won’t offer you as personalised results as Google, but it doesn't make a profile about you</li>
        <li>Searx doesn't care about what you search, never shares anything with a third party, and it can't be used to compromise you</li>
        <li>Searx doesn't make money on ads and it isn't customised based on your interests. You get the pure search results</li>
        <li>Searx is a free software, the code is 100% open and you can help to make it better. See more on <a href="https://gmail.com/asciimoo/searx">github</a></li>
    </ul>
    <p>If you do care about privacy, want to be a conscious user, moreover believe
--- a/searx/utils.py
+++ b/searx/utils.py
@ -5,7 +5,7 @@ import codecs
 import cStringIO
 import re
-def get_useragent():
+def gen_useragent():
    # TODO
    return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
--- a/searx/webapp.py
+++ b/searx/webapp.py
@ -22,12 +22,6 @@ import sys
 if __name__ == "__main__":
    sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../'))
 # first argument is for specifying settings module, used mostly by robot tests
 from sys import argv
 if len(argv) == 2:
    from importlib import import_module
    settings = import_module('searx.' + argv[1])
 else:
 from searx import settings
 from flask import Flask, request, render_template, url_for, Response, make_response, redirect
@ -41,7 +35,7 @@ from searx.utils import highlight_content, html_to_text
 app = Flask(__name__)
-app.secret_key = settings.secret_key
+app.secret_key = settings['server']['secret_key']
 opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
@ -58,8 +52,8 @@ opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
 def get_base_url():
-    if settings.base_url:
+    if settings['server']['base_url']:
-        hostname = settings.base_url
+        hostname = settings['server']['base_url']
    else:
        scheme = 'http'
        if request.is_secure:
@ -252,9 +246,9 @@ def run():
    from gevent import monkey
    monkey.patch_all()
-    app.run(debug        = settings.debug
+    app.run(debug        = settings['server']['debug']
-           ,use_debugger = settings.debug
+           ,use_debugger = settings['server']['debug']
-           ,port         = settings.port
+           ,port         = settings['server']['port']
           )
--- a/settings.yml
+++ b/settings.yml
@ -0,0 +1,107 @@
 server:
    port : 8888
    secret_key : "ultrasecretkey" # change this!
    debug : True
    request_timeout : 3.0 # seconds
    base_url: False
 engines:
  - name : wikipedia
    engine : mediawiki
    url    : https://en.wikipedia.org/
    number_of_results : 1
  - name : bing
    engine : bing
    locale : en-US
  - name : currency
    engine : currency_convert
    categories : general
  - name : deviantart
    engine : deviantart
    categories : images
  - name : ddg definitions
    engine : duckduckgo_definitions
  - name : duckduckgo
    engine : duckduckgo
    locale : en-us
  - name : filecrop
    engine : filecrop
    categories : files
  - name : flickr
    engine : flickr
    categories : images
  - name : github
    engine : github
    categories : it
  - name : google
    engine        : json_engine
    search_url    : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
    categories    : general
    url_query     : /responseData/results/unescapedUrl
    content_query : /responseData/results/content
    title_query   : /responseData/results/titleNoFormatting
  - name : google images
    engine : google_images
    categories : images
  - name : piratebay
    engine : piratebay
    categories : videos, music, files
  - name : soundcloud
    engine : soundcloud
    categories : music
  - name : stackoverflow
    engine : stackoverflow
    categories : it
  - name : startpage
    engine : startpage
  - name : twitter
    engine : twitter
    categories : social media
  - name : urbandictionary
    engine        : xpath
    search_url    : http://www.urbandictionary.com/define.php?term={query}
    url_xpath     : //div[@class="word"]//a/@href
    title_xpath   : //div[@class="word"]//a
    content_xpath : //div[@class="definition"]
  - name : yahoo
    engine           : xpath
    search_url       : http://search.yahoo.com/search?p={query}
    results_xpath    : //div[@class="res"]
    url_xpath        : .//h3/a/@href
    title_xpath      : .//h3/a
    content_xpath    : .//div[@class="abstr"]
    suggestion_xpath : //div[@id="satat"]//a
  - name : youtube
    engine : youtube
    categories : videos
  - name : dailymotion
    engine : dailymotion
    locale : en_US
    categories : videos
  - name : vimeo
    engine : vimeo
    categories : videos
    results_xpath : //div[@id="browse_content"]/ol/li
    url_xpath : ./a/@href
    title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
    content_xpath : ./a/img/@src
--- a/setup.py
+++ b/setup.py
@ -32,6 +32,7 @@ setup(
        'flask',
        'grequests',
        'lxml',
        'pyyaml',
        'setuptools',
    ],
    extras_require={
--- a/versions.cfg
+++ b/versions.cfg
@ -16,6 +16,7 @@ mccabe = 0.2.1
 pep8 = 1.4.6
 plone.testing = 4.0.8
 pyflakes = 0.7.3
 pyyaml = 3.10
 requests = 2.2.0
 robotframework-debuglibrary = 0.3
 robotframework-httplibrary = 0.4.2