From 55b30b5b494a59c62e9e7d1a5cb6fdbf8729f64f Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 19 Sep 2021 09:10:02 +0000 Subject: [PATCH] [fix] searx_extra/update scripts: set_loggers(wikidata, 'wikidata') (#331) * [fix] searx_extra/update scripts: set_loggers(wikidata, 'wikidata') To test use:: ./manage pyenv.cmd searx_extra/update/update_currencies.py ./manage pyenv.cmd searx_extra/update/update_osm_keys_tags.py ./manage pyenv.cmd searx_extra/update/update_wikidata_units.py The script `update_engine_descriptions.py` seems to have some issues not related to this patch. ./manage pyenv.cmd python -m pip install -U pycld3 ./manage pyenv.cmd searx_extra/update/update_engine_descriptions.py Closes: https://github.com/searxng/searxng/issues/328 Signed-off-by: Markus Heiser --- searx_extra/update/update_currencies.py | 7 +++++-- .../update/update_engine_descriptions.py | 20 ++++++++++++------- searx_extra/update/update_osm_keys_tags.py | 9 ++++++--- searx_extra/update/update_wikidata_units.py | 7 ++++--- 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/searx_extra/update/update_currencies.py b/searx_extra/update/update_currencies.py index a572f4e9d..063801645 100755 --- a/searx_extra/update/update_currencies.py +++ b/searx_extra/update/update_currencies.py @@ -10,8 +10,9 @@ from os.path import realpath, dirname, join from searx import searx_dir from searx.locales import LOCALE_NAMES -from searx.engines.wikidata import send_wikidata_query +from searx.engines import wikidata, set_loggers +set_loggers(wikidata, 'wikidata') # ORDER BY (with all the query fields) is important to keep a deterministic result order # so multiple invokation of this script doesn't change currencies.json @@ -83,7 +84,9 @@ def add_currency_label(db, label, iso4217, language): def wikidata_request_result_iterator(request): - result = send_wikidata_query(request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)) + result = wikidata.send_wikidata_query( + request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL) + ) if result is not None: for r in result['results']['bindings']: yield r diff --git a/searx_extra/update/update_engine_descriptions.py b/searx_extra/update/update_engine_descriptions.py index 37be77177..2a05790da 100755 --- a/searx_extra/update/update_engine_descriptions.py +++ b/searx_extra/update/update_engine_descriptions.py @@ -6,13 +6,15 @@ from urllib.parse import quote, urlparse import detect_language from lxml.html import fromstring -from searx.engines.wikidata import send_wikidata_query +from searx.engines import wikidata, set_loggers from searx.utils import extract_text from searx.locales import LOCALE_NAMES import searx import searx.search import searx.network +set_loggers(wikidata, 'wikidata') + SPARQL_WIKIPEDIA_ARTICLE = """ SELECT DISTINCT ?item ?name WHERE { @@ -128,9 +130,11 @@ def initialize(): def fetch_wikidata_descriptions(): global IDS - result = send_wikidata_query(SPARQL_DESCRIPTION - .replace('%IDS%', IDS) - .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)) + result = wikidata.send_wikidata_query( + SPARQL_DESCRIPTION + .replace('%IDS%', IDS) + .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL) + ) if result is not None: for binding in result['results']['bindings']: wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '') @@ -143,9 +147,11 @@ def fetch_wikidata_descriptions(): def fetch_wikipedia_descriptions(): global IDS - result = send_wikidata_query(SPARQL_WIKIPEDIA_ARTICLE - .replace('%IDS%', IDS) - .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)) + result = wikidata.send_wikidata_query( + SPARQL_WIKIPEDIA_ARTICLE + .replace('%IDS%', IDS) + .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL) + ) if result is not None: for binding in result['results']['bindings']: wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '') diff --git a/searx_extra/update/update_osm_keys_tags.py b/searx_extra/update/update_osm_keys_tags.py index a71f2d9a1..be76dc40c 100755 --- a/searx_extra/update/update_osm_keys_tags.py +++ b/searx_extra/update/update_osm_keys_tags.py @@ -46,10 +46,13 @@ from pathlib import Path from searx import searx_dir from searx.network import set_timeout_for_thread -from searx.engines.wikidata import send_wikidata_query +from searx.engines import wikidata, set_loggers from searx.languages import language_codes from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK +set_loggers(wikidata, 'wikidata') + + SPARQL_TAGS_REQUEST = """ SELECT ?tag ?item ?itemLabel WHERE { ?item wdt:P1282 ?tag . @@ -96,7 +99,7 @@ def get_preset_keys(): def get_keys(): results = get_preset_keys() - response = send_wikidata_query(SPARQL_KEYS_REQUEST) + response = wikidata.send_wikidata_query(SPARQL_KEYS_REQUEST) for key in response['results']['bindings']: keys = key['key']['value'].split(':')[1:] @@ -144,7 +147,7 @@ def get_keys(): def get_tags(): results = collections.OrderedDict() - response = send_wikidata_query(SPARQL_TAGS_REQUEST) + response = wikidata.send_wikidata_query(SPARQL_TAGS_REQUEST) for tag in response['results']['bindings']: tag_names = tag['tag']['value'].split(':')[1].split('=') if len(tag_names) == 2: diff --git a/searx_extra/update/update_wikidata_units.py b/searx_extra/update/update_wikidata_units.py index 1e6b8b9ca..ddde4c135 100755 --- a/searx_extra/update/update_wikidata_units.py +++ b/searx_extra/update/update_wikidata_units.py @@ -7,13 +7,14 @@ import collections from os.path import join from searx import searx_dir -from searx.engines.wikidata import send_wikidata_query +from searx.engines import wikidata, set_loggers +set_loggers(wikidata, 'wikidata') # the response contains duplicate ?item with the different ?symbol # "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result # even if a ?item has different ?symbol of the same rank. -# A deterministic result +# A deterministic result # see: # * https://www.wikidata.org/wiki/Help:Ranking # * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section) @@ -36,7 +37,7 @@ ORDER BY ?item DESC(?rank) ?symbol def get_data(): results = collections.OrderedDict() - response = send_wikidata_query(SARQL_REQUEST) + response = wikidata.send_wikidata_query(SARQL_REQUEST) for unit in response['results']['bindings']: name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '') unit = unit['symbol']['value']