From 2a788c8f29f63bc069436f1a12343a47d66f2523 Mon Sep 17 00:00:00 2001
From: asciimoo
Date: Fri, 31 Jan 2014 04:35:23 +0100
Subject: [PATCH] [enh] search language support init
---
searx/engines/__init__.py | 12 ++++++-
searx/engines/bing.py | 12 +++++--
searx/engines/google.py | 13 ++++---
searx/engines/wikipedia.py | 30 ++++++++++++++++
searx/languages.py | 59 ++++++++++++++++++++++++++++++++
searx/settings.yml | 3 +-
searx/static/css/style.css | 3 +-
searx/templates/preferences.html | 11 ++++++
searx/webapp.py | 32 +++++++++++++++--
9 files changed, 162 insertions(+), 13 deletions(-)
create mode 100644 searx/engines/wikipedia.py
create mode 100644 searx/languages.py
diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py
index 73a63f0a3..ac0c13c76 100644
--- a/searx/engines/__init__.py
+++ b/searx/engines/__init__.py
@@ -53,8 +53,14 @@ if not 'engines' in settings or not settings['engines']:
for engine_data in settings['engines']:
engine_name = engine_data['engine']
engine = load_module(engine_name + '.py')
+
if not hasattr(engine, 'paging'):
engine.paging = False
+
+ if not hasattr(engine, 'language_support'):
+ #engine.language_support = False
+ engine.language_support = True
+
for param_name in engine_data:
if param_name == 'engine':
continue
@@ -158,7 +164,7 @@ def score_results(results):
return sorted(results, key=itemgetter('score'), reverse=True)
-def search(query, request, selected_engines, pageno=1):
+def search(query, request, selected_engines, pageno=1, lang='all'):
global engines, categories, number_of_searches
requests = []
results = {}
@@ -176,11 +182,15 @@ def search(query, request, selected_engines, pageno=1):
if pageno > 1 and not engine.paging:
continue
+ if lang != 'all' and not engine.language_support:
+ continue
+
request_params = default_request_params()
request_params['headers']['User-Agent'] = user_agent
request_params['category'] = selected_engine['category']
request_params['started'] = datetime.now()
request_params['pageno'] = pageno
+ request_params['language'] = lang
request_params = engine.request(query, request_params)
callback = make_callback(
diff --git a/searx/engines/bing.py b/searx/engines/bing.py
index ec8a0c44c..00f66b118 100644
--- a/searx/engines/bing.py
+++ b/searx/engines/bing.py
@@ -4,16 +4,22 @@ from cgi import escape
base_url = 'http://www.bing.com/'
search_string = 'search?{query}&first={offset}'
-locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx
-
paging = True
+language_support = True
def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1
+ if params['language'] == 'all':
+ language = 'en-US'
+ else:
+ language = params['language'].replace('_', '-')
search_path = search_string.format(
- query=urlencode({'q': query, 'setmkt': locale}),
+ query=urlencode({'q': query, 'setmkt': language}),
offset=offset)
+
+ params['cookies']['SRCHHPGUSR'] = \
+ 'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0]
#if params['category'] == 'images':
# params['url'] = base_url + 'images/' + search_path
params['url'] = base_url + search_path
diff --git a/searx/engines/google.py b/searx/engines/google.py
index b90c7adc5..2c6a98af3 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -5,16 +5,21 @@ from json import loads
categories = ['general']
-paging = True
-
url = 'https://ajax.googleapis.com/'
-search_url = url + 'ajax/services/search/web?v=1.0&start={offset}&rsz=large&safe=off&filter=off&{query}' # noqa
+search_url = url + 'ajax/services/search/web?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa
+
+paging = True
+language_support = True
def request(query, params):
offset = (params['pageno'] - 1) * 8
+ language = 'en-US'
+ if params['language'] != 'all':
+ language = params['language'].replace('_', '-')
params['url'] = search_url.format(offset=offset,
- query=urlencode({'q': query}))
+ query=urlencode({'q': query}),
+ language=language)
return params
diff --git a/searx/engines/wikipedia.py b/searx/engines/wikipedia.py
new file mode 100644
index 000000000..1e2a798cc
--- /dev/null
+++ b/searx/engines/wikipedia.py
@@ -0,0 +1,30 @@
+from json import loads
+from urllib import urlencode, quote
+
+url = 'https://{language}.wikipedia.org/'
+
+search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json&sroffset={offset}' # noqa
+
+number_of_results = 10
+
+language_support = True
+
+
+def request(query, params):
+ offset = (params['pageno'] - 1) * 10
+ if params['language'] == 'all':
+ language = 'en'
+ else:
+ language = params['language'].split('_')[0]
+ params['language'] = language
+ params['url'] = search_url.format(query=urlencode({'srsearch': query}),
+ offset=offset,
+ language=language)
+ return params
+
+
+def response(resp):
+ search_results = loads(resp.text)
+ res = search_results.get('query', {}).get('search', [])
+ return [{'url': url.format(language=resp.search_params['language']) + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')), # noqa
+ 'title': result['title']} for result in res[:int(number_of_results)]]
diff --git a/searx/languages.py b/searx/languages.py
new file mode 100644
index 000000000..8b12e5ffe
--- /dev/null
+++ b/searx/languages.py
@@ -0,0 +1,59 @@
+language_codes = (
+ ("ar_XA", "Arabic", "Arabia"),
+ ("bg_BG", "Bulgarian", "Bulgaria"),
+ ("cs_CZ", "Czech", "Czech Republic"),
+ ("de_DE", "German", "Germany"),
+ ("da_DK", "Danish", "Denmark"),
+ ("de_AT", "German", "Austria"),
+ ("de_CH", "German", "Switzerland"),
+ ("el_GR", "Greek", "Greece"),
+ ("en_AU", "English", "Australia"),
+ ("en_CA", "English", "Canada"),
+ ("en_GB", "English", "United Kingdom"),
+ ("en_ID", "English", "Indonesia"),
+ ("en_IE", "English", "Ireland"),
+ ("en_IN", "English", "India"),
+ ("en_MY", "English", "Malaysia"),
+ ("en_NZ", "English", "New Zealand"),
+ ("en_PH", "English", "Philippines"),
+ ("en_SG", "English", "Singapore"),
+ ("en_US", "English", "United States"),
+ ("en_XA", "English", "Arabia"),
+ ("en_ZA", "English", "South Africa"),
+ ("es_AR", "Spanish", "Argentina"),
+ ("es_CL", "Spanish", "Chile"),
+ ("es_ES", "Spanish", "Spain"),
+ ("es_MX", "Spanish", "Mexico"),
+ ("es_US", "Spanish", "United States"),
+ ("es_XL", "Spanish", "Latin America"),
+ ("et_EE", "Estonian", "Estonia"),
+ ("fi_FI", "Finnish", "Finland"),
+ ("fr_BE", "French", "Belgium"),
+ ("fr_CA", "French", "Canada"),
+ ("fr_CH", "French", "Switzerland"),
+ ("fr_FR", "French", "France"),
+ ("he_IL", "Hebrew", "Israel"),
+ ("hr_HR", "Croatian", "Croatia"),
+ ("hu_HU", "Hungarian", "Hungary"),
+ ("it_IT", "Italian", "Italy"),
+ ("ja_JP", "Japanese", "Japan"),
+ ("ko_KR", "Korean", "Korea"),
+ ("lt_LT", "Lithuanian", "Lithuania"),
+ ("lv_LV", "Latvian", "Latvia"),
+ ("nb_NO", "Norwegian", "Norway"),
+ ("nl_BE", "Dutch", "Belgium"),
+ ("nl_NL", "Dutch", "Netherlands"),
+ ("pl_PL", "Polish", "Poland"),
+ ("pt_BR", "Portuguese", "Brazil"),
+ ("pt_PT", "Portuguese", "Portugal"),
+ ("ro_RO", "Romanian", "Romania"),
+ ("ru_RU", "Russian", "Russia"),
+ ("sk_SK", "Slovak", "Slovak Republic"),
+ ("sl_SL", "Slovenian", "Slovenia"),
+ ("sv_SE", "Swedish", "Sweden"),
+ ("th_TH", "Thai", "Thailand"),
+ ("tr_TR", "Turkish", "Turkey"),
+ ("uk_UA", "Ukrainian", "Ukraine"),
+ ("zh_CN", "Chinese", "China"),
+ ("zh_HK", "Chinese", "Hong Kong SAR"),
+ ("zh_TW", "Chinese", "Taiwan"))
diff --git a/searx/settings.yml b/searx/settings.yml
index 357dabd5e..c07286aed 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -7,8 +7,7 @@ server:
engines:
- name : wikipedia
- engine : mediawiki
- url : https://en.wikipedia.org/
+ engine : wikipedia
number_of_results : 1
paging : False
diff --git a/searx/static/css/style.css b/searx/static/css/style.css
index 911115159..a98a7af1a 100644
--- a/searx/static/css/style.css
+++ b/searx/static/css/style.css
@@ -152,7 +152,7 @@ tr:hover td { background: #DDDDDD; }
#results { margin: 10px; padding: 0; margin-bottom: 20px; }
#sidebar { position: absolute; left: 54em; width: 12em; margin: 0 2px 5px 5px; padding: 0 2px 2px 2px; }
-#suggestions span { display: block; margin: 0 2px 10px 2px; padding: 0; }
+#suggestions span { display: block; margin: 0 2px 2px 2px; padding: 0; }
#suggestions form { display: block; }
#suggestions input { padding: 2px 6px; margin: 2px 4px; font-size: 0.8em; display: inline-block; background: #3498DB; color: #FFFFFF; border-radius: 4px; border: 0; cursor: pointer; }
@@ -177,6 +177,7 @@ tr:hover td { background: #DDDDDD; }
}
#apis {
+ margin-top: 8px;
clear: both;
}
diff --git a/searx/templates/preferences.html b/searx/templates/preferences.html
index caa957052..249cebe63 100644
--- a/searx/templates/preferences.html
+++ b/searx/templates/preferences.html
@@ -11,6 +11,17 @@
{% include 'categories.html' %}
+