From 510aba5e6699f76e5b9dc32db18b0f19db6e5da4 Mon Sep 17 00:00:00 2001 From: Thomas Pointhuber Date: Wed, 1 Oct 2014 17:18:18 +0200 Subject: [PATCH] implement query parser and use it inside autocompletion --- searx/query.py | 125 ++++++++++++++++++++++++++++++++++++++++++++++++ searx/webapp.py | 30 ++++++++++-- 2 files changed, 152 insertions(+), 3 deletions(-) create mode 100644 searx/query.py diff --git a/searx/query.py b/searx/query.py new file mode 100644 index 000000000..59a1e347b --- /dev/null +++ b/searx/query.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python + +''' +searx is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +searx is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with searx. If not, see < http://www.gnu.org/licenses/ >. + +(C) 2014 by Thomas Pointhuber, +''' + +from searx.languages import language_codes +from searx.engines import ( + categories, engines, engine_shortcuts +) +import string +import re + + +class Query(object): + """parse query""" + + def __init__(self, query, blocked_engines): + self.query = query + self.blocked_engines = [] + + if blocked_engines: + self.blocked_engines = blocked_engines + + self.query_parts = [] + self.engines = [] + self.languages = [] + + def parse_query(self): + self.query_parts = [] + + # split query, including whitespaces + raw_query_parts = re.split(r'(\s+)', self.query) + + parse_next = True + + for query_part in raw_query_parts: + if not parse_next: + self.query_parts[-1] += query_part + continue + + parse_next = False + + # part does only contain spaces, skip + if query_part.isspace(): + parse_next = True + self.query_parts.append(query_part) + continue + + # this force a language + if query_part[0] == ':': + lang = query_part[1:].lower() + + # check if any language-code is equal with declared language-codes + for lc in language_codes: + lang_id, lang_name, country = map(str.lower, lc) + + # if correct language-code is found, set it as new search-language + if lang == lang_id\ + or lang_id.startswith(lang)\ + or lang == lang_name\ + or lang == country: + parse_next = True + self.languages.append(lang) + break + + # this force a engine or category + if query_part[0] == '!': + prefix = query_part[1:].replace('_', ' ') + + # check if prefix is equal with engine shortcut + if prefix in engine_shortcuts\ + and not engine_shortcuts[prefix] in self.blocked_engines: + parse_next = True + self.engines.append({'category': 'none', + 'name': engine_shortcuts[prefix]}) + + # check if prefix is equal with engine name + elif prefix in engines\ + and not prefix in self.blocked_engines: + parse_next = True + self.engines.append({'category': 'none', + 'name': prefix}) + + # check if prefix is equal with categorie name + elif prefix in categories: + # using all engines for that search, which are declared under that categorie name + parse_next = True + self.engines.extend({'category': prefix, + 'name': engine.name} + for engine in categories[prefix] + if not engine in self.blocked_engines) + + # append query part to query_part list + self.query_parts.append(query_part) + + def changeSearchQuery(self, search_query): + if len(self.query_parts): + self.query_parts[-1] = search_query + else: + self.query_parts.append(search_query) + + def getSearchQuery(self): + if len(self.query_parts): + return self.query_parts[-1] + else: + return '' + + def getFullQuery(self): + # get full querry including whitespaces + return string.join(self.query_parts, '') + diff --git a/searx/webapp.py b/searx/webapp.py index 42cb42678..f66466b35 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -47,6 +47,7 @@ from searx.utils import ( from searx.https_rewrite import https_rules from searx.languages import language_codes from searx.search import Search +from searx.query import Query from searx.autocomplete import backends as autocomplete_backends @@ -308,23 +309,46 @@ def autocompleter(): """Return autocompleter results""" request_data = {} + # select request method if request.method == 'POST': request_data = request.form else: request_data = request.args - query = request_data.get('q', '').encode('utf-8') + # set blocked engines + if request.cookies.get('blocked_engines'): + blocked_engines = request.cookies['blocked_engines'].split(',') # noqa + else: + blocked_engines = [] - if not query: + # parse query + query = Query(request_data.get('q', '').encode('utf-8'), blocked_engines) + query.parse_query() + + # check if search query is set + if not query.getSearchQuery(): return + # run autocompleter completer = autocomplete_backends.get(request.cookies.get('autocomplete')) + # check if valid autocompleter is selected if not completer: return - results = completer(query) + # run autocompletion + raw_results = completer(query.getSearchQuery()) + # parse results (write :language and !engine back to result string) + results = [] + for result in raw_results: + result_query = query + result_query.changeSearchQuery(result) + + # add parsed result + results.append(result_query.getFullQuery()) + + # return autocompleter results if request_data.get('format') == 'x-suggestions': return Response(json.dumps([query, results]), mimetype='application/json')