2021-02-19 12:52:26 +01:00
|
|
|
#!/usr/bin/env python
|
2022-01-03 12:58:48 +01:00
|
|
|
# lint: pylint
|
2021-10-03 15:12:09 +02:00
|
|
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
2020-08-06 17:42:46 +02:00
|
|
|
|
2022-01-03 12:40:06 +01:00
|
|
|
"""Fetch currencies from :origin:`searx/engines/wikidata.py` engine.
|
|
|
|
|
|
|
|
Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ...
|
|
|
|
<.github/workflows/data-update.yml>`).
|
|
|
|
|
|
|
|
"""
|
2022-01-03 12:58:48 +01:00
|
|
|
|
|
|
|
# pylint: disable=invalid-name
|
|
|
|
|
2015-05-12 20:52:08 +02:00
|
|
|
import re
|
|
|
|
import unicodedata
|
2021-02-19 12:52:26 +01:00
|
|
|
import json
|
2016-07-08 18:43:28 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
# set path
|
2022-01-03 12:58:48 +01:00
|
|
|
from os.path import join
|
2021-02-19 12:52:26 +01:00
|
|
|
|
2021-08-03 15:13:00 +02:00
|
|
|
from searx import searx_dir
|
2022-06-29 20:56:16 +02:00
|
|
|
from searx.locales import LOCALE_NAMES, locales_initialize
|
2021-09-19 11:10:02 +02:00
|
|
|
from searx.engines import wikidata, set_loggers
|
2021-02-19 12:52:26 +01:00
|
|
|
|
2021-09-19 11:10:02 +02:00
|
|
|
set_loggers(wikidata, 'wikidata')
|
2022-06-29 20:56:16 +02:00
|
|
|
locales_initialize()
|
2021-02-19 12:52:26 +01:00
|
|
|
|
|
|
|
# ORDER BY (with all the query fields) is important to keep a deterministic result order
|
2022-09-27 17:01:00 +02:00
|
|
|
# so multiple invocation of this script doesn't change currencies.json
|
2021-02-19 12:52:26 +01:00
|
|
|
SARQL_REQUEST = """
|
|
|
|
SELECT DISTINCT ?iso4217 ?unit ?unicode ?label ?alias WHERE {
|
|
|
|
?item wdt:P498 ?iso4217; rdfs:label ?label.
|
|
|
|
OPTIONAL { ?item skos:altLabel ?alias FILTER (LANG (?alias) = LANG(?label)). }
|
|
|
|
OPTIONAL { ?item wdt:P5061 ?unit. }
|
|
|
|
OPTIONAL { ?item wdt:P489 ?symbol.
|
|
|
|
?symbol wdt:P487 ?unicode. }
|
|
|
|
MINUS { ?item wdt:P582 ?end_data . } # Ignore monney with an end date
|
|
|
|
MINUS { ?item wdt:P31/wdt:P279* wd:Q15893266 . } # Ignore "former entity" (obsolete currency)
|
|
|
|
FILTER(LANG(?label) IN (%LANGUAGES_SPARQL%)).
|
2015-05-12 20:52:08 +02:00
|
|
|
}
|
2021-02-19 12:52:26 +01:00
|
|
|
ORDER BY ?iso4217 ?unit ?unicode ?label ?alias
|
|
|
|
"""
|
|
|
|
|
|
|
|
# ORDER BY (with all the query fields) is important to keep a deterministic result order
|
2022-09-27 17:01:00 +02:00
|
|
|
# so multiple invocation of this script doesn't change currencies.json
|
2021-02-19 12:52:26 +01:00
|
|
|
SPARQL_WIKIPEDIA_NAMES_REQUEST = """
|
|
|
|
SELECT DISTINCT ?iso4217 ?article_name WHERE {
|
|
|
|
?item wdt:P498 ?iso4217 .
|
|
|
|
?article schema:about ?item ;
|
|
|
|
schema:name ?article_name ;
|
|
|
|
schema:isPartOf [ wikibase:wikiGroup "wikipedia" ]
|
|
|
|
MINUS { ?item wdt:P582 ?end_data . } # Ignore monney with an end date
|
|
|
|
MINUS { ?item wdt:P31/wdt:P279* wd:Q15893266 . } # Ignore "former entity" (obsolete currency)
|
|
|
|
FILTER(LANG(?article_name) IN (%LANGUAGES_SPARQL%)).
|
|
|
|
}
|
|
|
|
ORDER BY ?iso4217 ?article_name
|
|
|
|
"""
|
2015-05-12 20:52:08 +02:00
|
|
|
|
|
|
|
|
2021-08-03 15:13:00 +02:00
|
|
|
LANGUAGES = LOCALE_NAMES.keys()
|
2021-02-19 12:52:26 +01:00
|
|
|
LANGUAGES_SPARQL = ', '.join(set(map(lambda l: repr(l.split('_')[0]), LANGUAGES)))
|
2015-05-12 20:52:08 +02:00
|
|
|
|
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
def remove_accents(name):
|
|
|
|
return unicodedata.normalize('NFKD', name).lower()
|
2015-05-12 20:52:08 +02:00
|
|
|
|
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
def remove_extra(name):
|
|
|
|
for c in ('(', ':'):
|
|
|
|
if c in name:
|
|
|
|
name = name.split(c)[0].strip()
|
|
|
|
return name
|
2015-05-12 20:52:08 +02:00
|
|
|
|
2016-07-08 18:43:28 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
def _normalize_name(name):
|
|
|
|
name = re.sub(' +', ' ', remove_accents(name.lower()).replace('-', ' '))
|
|
|
|
name = remove_extra(name)
|
|
|
|
return name
|
2015-05-12 20:52:08 +02:00
|
|
|
|
2016-07-08 18:43:28 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
def add_currency_name(db, name, iso4217, normalize_name=True):
|
|
|
|
db_names = db['names']
|
2015-05-12 20:52:08 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
if normalize_name:
|
|
|
|
name = _normalize_name(name)
|
2016-07-08 18:43:28 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
iso4217_set = db_names.setdefault(name, [])
|
|
|
|
if iso4217 not in iso4217_set:
|
|
|
|
iso4217_set.insert(0, iso4217)
|
2016-07-08 18:43:28 +02:00
|
|
|
|
2015-05-12 20:52:08 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
def add_currency_label(db, label, iso4217, language):
|
|
|
|
labels = db['iso4217'].setdefault(iso4217, {})
|
|
|
|
labels[language] = label
|
2015-05-12 20:52:08 +02:00
|
|
|
|
2016-07-08 18:43:28 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
def wikidata_request_result_iterator(request):
|
2021-12-27 09:26:22 +01:00
|
|
|
result = wikidata.send_wikidata_query(request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
|
2021-02-19 12:52:26 +01:00
|
|
|
if result is not None:
|
|
|
|
for r in result['results']['bindings']:
|
|
|
|
yield r
|
2016-07-08 18:43:28 +02:00
|
|
|
|
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
def fetch_db():
|
|
|
|
db = {
|
|
|
|
'names': {},
|
|
|
|
'iso4217': {},
|
|
|
|
}
|
2016-07-08 18:43:28 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
for r in wikidata_request_result_iterator(SPARQL_WIKIPEDIA_NAMES_REQUEST):
|
|
|
|
iso4217 = r['iso4217']['value']
|
|
|
|
article_name = r['article_name']['value']
|
|
|
|
article_lang = r['article_name']['xml:lang']
|
|
|
|
add_currency_name(db, article_name, iso4217)
|
|
|
|
add_currency_label(db, article_name, iso4217, article_lang)
|
2016-07-08 18:43:28 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
for r in wikidata_request_result_iterator(SARQL_REQUEST):
|
|
|
|
iso4217 = r['iso4217']['value']
|
|
|
|
if 'label' in r:
|
|
|
|
label = r['label']['value']
|
|
|
|
label_lang = r['label']['xml:lang']
|
|
|
|
add_currency_name(db, label, iso4217)
|
|
|
|
add_currency_label(db, label, iso4217, label_lang)
|
2016-07-08 18:43:28 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
if 'alias' in r:
|
|
|
|
add_currency_name(db, r['alias']['value'], iso4217)
|
2016-07-08 18:43:28 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
if 'unicode' in r:
|
|
|
|
add_currency_name(db, r['unicode']['value'], iso4217, normalize_name=False)
|
2016-07-08 18:43:28 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
if 'unit' in r:
|
|
|
|
add_currency_name(db, r['unit']['value'], iso4217, normalize_name=False)
|
2015-05-12 20:52:08 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
return db
|
2016-07-08 18:43:28 +02:00
|
|
|
|
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
def get_filename():
|
|
|
|
return join(join(searx_dir, "data"), "currencies.json")
|
2016-07-08 18:43:28 +02:00
|
|
|
|
2016-07-15 19:49:23 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
def main():
|
[fix] update_currencies.py - AttributeError: 'str' object has no attribute 'insert'
Replace lists with one item by the item, not before last currency has been
added. In this traceback 'MXN' is added to 'pesos' while pesos is no longer a
list as the optimization was carried out too early.
$ ./local/py3/bin/python searxng_extra/update/update_currencies.py
Traceback (most recent call last):
File "searxng_extra/update/update_currencies.py", line 164, in <module>
main()
File "searxng_extra/update/update_currencies.py", line 157, in main
add_currency_name(db, "pesos", 'MXN')
File "searxng_extra/update/update_currencies.py", line 89, in add_currency_name
iso4217_set.insert(0, iso4217)
AttributeError: 'str' object has no attribute 'insert'
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2023-06-28 20:48:20 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
db = fetch_db()
|
[fix] update_currencies.py - AttributeError: 'str' object has no attribute 'insert'
Replace lists with one item by the item, not before last currency has been
added. In this traceback 'MXN' is added to 'pesos' while pesos is no longer a
list as the optimization was carried out too early.
$ ./local/py3/bin/python searxng_extra/update/update_currencies.py
Traceback (most recent call last):
File "searxng_extra/update/update_currencies.py", line 164, in <module>
main()
File "searxng_extra/update/update_currencies.py", line 157, in main
add_currency_name(db, "pesos", 'MXN')
File "searxng_extra/update/update_currencies.py", line 89, in add_currency_name
iso4217_set.insert(0, iso4217)
AttributeError: 'str' object has no attribute 'insert'
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2023-06-28 20:48:20 +02:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
# static
|
|
|
|
add_currency_name(db, "euro", 'EUR')
|
|
|
|
add_currency_name(db, "euros", 'EUR')
|
|
|
|
add_currency_name(db, "dollar", 'USD')
|
|
|
|
add_currency_name(db, "dollars", 'USD')
|
|
|
|
add_currency_name(db, "peso", 'MXN')
|
|
|
|
add_currency_name(db, "pesos", 'MXN')
|
2015-05-12 20:52:08 +02:00
|
|
|
|
[fix] update_currencies.py - AttributeError: 'str' object has no attribute 'insert'
Replace lists with one item by the item, not before last currency has been
added. In this traceback 'MXN' is added to 'pesos' while pesos is no longer a
list as the optimization was carried out too early.
$ ./local/py3/bin/python searxng_extra/update/update_currencies.py
Traceback (most recent call last):
File "searxng_extra/update/update_currencies.py", line 164, in <module>
main()
File "searxng_extra/update/update_currencies.py", line 157, in main
add_currency_name(db, "pesos", 'MXN')
File "searxng_extra/update/update_currencies.py", line 89, in add_currency_name
iso4217_set.insert(0, iso4217)
AttributeError: 'str' object has no attribute 'insert'
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2023-06-28 20:48:20 +02:00
|
|
|
# reduce memory usage:
|
|
|
|
# replace lists with one item by the item. see
|
|
|
|
# searx.search.processors.online_currency.name_to_iso4217
|
|
|
|
for name in db['names']:
|
|
|
|
if len(db['names'][name]) == 1:
|
|
|
|
db['names'][name] = db['names'][name][0]
|
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
with open(get_filename(), 'w', encoding='utf8') as f:
|
|
|
|
json.dump(db, f, ensure_ascii=False, indent=4)
|
2015-05-12 20:52:08 +02:00
|
|
|
|
2021-12-27 09:26:22 +01:00
|
|
|
|
2021-02-19 12:52:26 +01:00
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|