mirror of https://github.com/searxng/searxng.git
Compare commits
9 Commits
7fc6ac283e
...
45f984dc82
Author | SHA1 | Date |
---|---|---|
Alexandre Flament | 45f984dc82 | |
Alexandre Flament | 890bc164ee | |
Alexandre Flament | b32ed46a54 | |
Alexandre Flament | 2923c9142e | |
Bnyro | 383d873597 | |
Markus Heiser | fb32425d78 | |
Bnyro | 72be98e12f | |
Markus Heiser | 742303d030 | |
Markus Heiser | 63cf80aae5 |
|
@ -0,0 +1,9 @@
|
|||
.. _unit converter plugin:
|
||||
|
||||
=====================
|
||||
Unit converter plugin
|
||||
=====================
|
||||
|
||||
.. automodule:: searx.plugins.unit_converter
|
||||
:members:
|
||||
|
|
@ -14,9 +14,12 @@ sphinx-tabs==3.4.5
|
|||
sphinxcontrib-programoutput==0.17
|
||||
sphinx-autobuild==2021.3.14
|
||||
sphinx-notfound-page==1.0.0
|
||||
myst-parser==2.0.0
|
||||
linuxdoc==20231020
|
||||
myst-parser==3.0.1
|
||||
linuxdoc==20240509
|
||||
aiounittest==1.4.2
|
||||
yamllint==1.35.1
|
||||
wlc==1.14
|
||||
coloredlogs==15.0.1
|
||||
docutils<=0.21; python_version == '3.8'
|
||||
docutils>=0.21.2; python_version > '3.8'
|
||||
|
||||
|
|
|
@ -7,21 +7,29 @@
|
|||
|
||||
__all__ = [
|
||||
'ENGINE_TRAITS',
|
||||
'CURRENCIES',
|
||||
'USER_AGENTS',
|
||||
'EXTERNAL_URLS',
|
||||
'WIKIDATA_UNITS',
|
||||
'EXTERNAL_BANGS',
|
||||
'OSM_KEYS_TAGS',
|
||||
'ENGINE_DESCRIPTIONS',
|
||||
'LOCALES',
|
||||
'ahmia_blacklist_loader',
|
||||
'fetch_engine_descriptions',
|
||||
'fetch_iso4217_from_user',
|
||||
'fetch_name_from_iso4217',
|
||||
'fetch_osm_key_label',
|
||||
]
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
import json
|
||||
import sqlite3
|
||||
from typing import Dict, List, Optional
|
||||
from functools import lru_cache
|
||||
from threading import local
|
||||
from pathlib import Path
|
||||
|
||||
data_dir = Path(__file__).parent
|
||||
data_connection_local = local()
|
||||
|
||||
|
||||
def _load(filename):
|
||||
|
@ -29,6 +37,108 @@ def _load(filename):
|
|||
return json.load(f)
|
||||
|
||||
|
||||
def _get_connection(filename: str) -> sqlite3.Connection:
|
||||
"""Return a read only SQLite connection to filename.
|
||||
The filename is relative to searx/data
|
||||
|
||||
Multiple calls to this function in the same thread,
|
||||
already return the same connection.
|
||||
"""
|
||||
connection = data_connection_local.__dict__.get(filename)
|
||||
if connection is not None:
|
||||
return connection
|
||||
|
||||
data_filename = str(data_dir / filename)
|
||||
# open database in read only mode
|
||||
data_connection = sqlite3.connect(f'file:{data_filename}?mode=ro', uri=True)
|
||||
|
||||
data_connection_local.__dict__[filename] = data_connection
|
||||
return data_connection
|
||||
|
||||
|
||||
def fetch_engine_descriptions(language) -> Dict[str, List[str]]:
|
||||
"""Return engine description and source for each engine name."""
|
||||
res = _get_connection("engine_descriptions.db").execute(
|
||||
"SELECT engine, description, source FROM engine_descriptions WHERE language=?", (language,)
|
||||
)
|
||||
return {result[0]: [result[1], result[2]] for result in res.fetchall()}
|
||||
|
||||
|
||||
def _normalize_name(name):
|
||||
name = name.lower().replace('-', ' ').rstrip('s')
|
||||
name = re.sub(' +', ' ', name)
|
||||
return unicodedata.normalize('NFKD', name).lower()
|
||||
|
||||
|
||||
@lru_cache(10)
|
||||
def fetch_iso4217_from_user(name: str) -> Optional[str]:
|
||||
connection = _get_connection("currencies.db")
|
||||
|
||||
# try the iso4217
|
||||
res = connection.execute("SELECT iso4217 FROM currencies WHERE lower(iso4217)=? LIMIT 1", (name.lower(),))
|
||||
result = res.fetchone()
|
||||
if result:
|
||||
return result[0]
|
||||
|
||||
# try the currency names
|
||||
name = _normalize_name(name)
|
||||
res = connection.execute("SELECT iso4217 FROM currencies WHERE name=?", (name,))
|
||||
result = list(set(result[0] for result in res.fetchall()))
|
||||
if len(result) == 1:
|
||||
return result[0]
|
||||
|
||||
# ambiguity --> return nothing
|
||||
return None
|
||||
|
||||
|
||||
@lru_cache(10)
|
||||
def fetch_name_from_iso4217(iso4217: str, language: str) -> Optional[str]:
|
||||
res = _get_connection("currencies.db").execute(
|
||||
"SELECT name FROM currencies WHERE iso4217=? AND language=?", (iso4217, language)
|
||||
)
|
||||
result = [result[0] for result in res.fetchall()]
|
||||
if len(result) == 1:
|
||||
return result[0]
|
||||
return None
|
||||
|
||||
|
||||
@lru_cache(100)
|
||||
def fetch_osm_key_label(key_name: str, language: str) -> Optional[str]:
|
||||
if key_name.startswith('currency:'):
|
||||
# currency:EUR --> get the name from the CURRENCIES variable
|
||||
# see https://wiki.openstreetmap.org/wiki/Key%3Acurrency
|
||||
# and for example https://taginfo.openstreetmap.org/keys/currency:EUR#values
|
||||
# but there is also currency=EUR (currently not handled)
|
||||
# https://taginfo.openstreetmap.org/keys/currency#values
|
||||
currency = key_name.split(':')
|
||||
if len(currency) > 1:
|
||||
label = fetch_name_from_iso4217(currency[1], language)
|
||||
if label:
|
||||
return label
|
||||
return currency[1]
|
||||
|
||||
language = language.lower()
|
||||
language_short = language.split('-')[0]
|
||||
res = _get_connection("osm_keys_tags.db").execute(
|
||||
"SELECT language, label FROM osm_keys WHERE name=? AND language in (?, ?, 'en')",
|
||||
(key_name, language, language_short),
|
||||
)
|
||||
result = {result[0]: result[1] for result in res.fetchall()}
|
||||
return result.get(language) or result.get(language_short) or result.get('en')
|
||||
|
||||
|
||||
@lru_cache(100)
|
||||
def fetch_osm_tag_label(tag_key: str, tag_value: str, language: str) -> Optional[str]:
|
||||
language = language.lower()
|
||||
language_short = language.split('-')[0]
|
||||
res = _get_connection("osm_keys_tags.db").execute(
|
||||
"SELECT language, label FROM osm_tags WHERE tag_key=? AND tag_value=? AND language in (?, ?, 'en')",
|
||||
(tag_key, tag_value, language, language_short),
|
||||
)
|
||||
result = {result[0]: result[1] for result in res.fetchall()}
|
||||
return result.get(language) or result.get(language_short) or result.get('en')
|
||||
|
||||
|
||||
def ahmia_blacklist_loader():
|
||||
"""Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion
|
||||
names. The MD5 values are fetched by::
|
||||
|
@ -42,12 +152,9 @@ def ahmia_blacklist_loader():
|
|||
return f.read().split()
|
||||
|
||||
|
||||
CURRENCIES = _load('currencies.json')
|
||||
USER_AGENTS = _load('useragents.json')
|
||||
EXTERNAL_URLS = _load('external_urls.json')
|
||||
WIKIDATA_UNITS = _load('wikidata_units.json')
|
||||
EXTERNAL_BANGS = _load('external_bangs.json')
|
||||
OSM_KEYS_TAGS = _load('osm_keys_tags.json')
|
||||
ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
|
||||
ENGINE_TRAITS = _load('engine_traits.json')
|
||||
LOCALES = _load('locales.json')
|
||||
|
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,3 @@
|
|||
Dumps of the SQLite files in ``searx.data``.
|
||||
|
||||
These files are not used by SearXNG, they are here for reference.
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -6,7 +6,7 @@ from time import time
|
|||
from urllib.parse import urlencode
|
||||
|
||||
from searx.network import get as http_get
|
||||
from searx.engines.openstreetmap import get_key_label
|
||||
from searx.data import fetch_osm_key_label
|
||||
|
||||
about = {
|
||||
"website": 'https://www.apple.com/maps/',
|
||||
|
@ -72,7 +72,7 @@ def response(resp):
|
|||
telephone = result['telephone']
|
||||
links.append(
|
||||
{
|
||||
'label': get_key_label('phone', user_language),
|
||||
'label': fetch_osm_key_label('phone', user_language),
|
||||
'url': 'tel:' + telephone,
|
||||
'url_label': telephone,
|
||||
}
|
||||
|
@ -81,7 +81,7 @@ def response(resp):
|
|||
url = result['urls'][0]
|
||||
links.append(
|
||||
{
|
||||
'label': get_key_label('website', user_language),
|
||||
'label': fetch_osm_key_label('website', user_language),
|
||||
'url': url,
|
||||
'url_label': url,
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@ from functools import partial
|
|||
|
||||
from flask_babel import gettext
|
||||
|
||||
from searx.data import OSM_KEYS_TAGS, CURRENCIES
|
||||
from searx.data import fetch_osm_tag_label, fetch_osm_key_label
|
||||
from searx.utils import searx_useragent
|
||||
from searx.external_urls import get_external_url
|
||||
from searx.engines.wikidata import send_wikidata_query, sparql_string_escape, get_thumbnail
|
||||
|
@ -187,14 +187,14 @@ def response(resp):
|
|||
'template': 'map.html',
|
||||
'title': title,
|
||||
'address': address,
|
||||
'address_label': get_key_label('addr', user_language),
|
||||
'address_label': fetch_osm_key_label('addr', user_language),
|
||||
'url': url,
|
||||
'osm': osm,
|
||||
'geojson': geojson,
|
||||
'img_src': img_src,
|
||||
'links': links,
|
||||
'data': data,
|
||||
'type': get_tag_label(result.get('category'), result.get('type', ''), user_language),
|
||||
'type': fetch_osm_tag_label(result.get('category'), result.get('type', ''), user_language),
|
||||
'type_icon': result.get('icon'),
|
||||
'content': '',
|
||||
'longitude': result['lon'],
|
||||
|
@ -367,7 +367,7 @@ def get_links(result, user_language):
|
|||
url_label = result.get('wikidata', {}).get('itemLabel') or url_label
|
||||
links.append(
|
||||
{
|
||||
'label': get_key_label(k, user_language),
|
||||
'label': fetch_osm_key_label(k, user_language),
|
||||
'url': url,
|
||||
'url_label': url_label,
|
||||
}
|
||||
|
@ -389,7 +389,7 @@ def get_data(result, user_language, ignore_keys):
|
|||
continue
|
||||
if get_key_rank(k) is None:
|
||||
continue
|
||||
k_label = get_key_label(k, user_language)
|
||||
k_label = fetch_osm_key_label(k, user_language)
|
||||
if k_label:
|
||||
data.append(
|
||||
{
|
||||
|
@ -412,51 +412,3 @@ def get_key_rank(k):
|
|||
# "payment:*" in KEY_ORDER matches "payment:cash", "payment:debit card", etc...
|
||||
key_rank = KEY_RANKS.get(k.split(':')[0] + ':*')
|
||||
return key_rank
|
||||
|
||||
|
||||
def get_label(labels, lang):
|
||||
"""Get label from labels in OSM_KEYS_TAGS
|
||||
|
||||
in OSM_KEYS_TAGS, labels have key == '*'
|
||||
"""
|
||||
tag_label = labels.get(lang.lower())
|
||||
if tag_label is None:
|
||||
# example: if 'zh-hk' is not found, check 'zh'
|
||||
tag_label = labels.get(lang.split('-')[0])
|
||||
if tag_label is None and lang != 'en':
|
||||
# example: if 'zh' is not found, check 'en'
|
||||
tag_label = labels.get('en')
|
||||
if tag_label is None and len(labels.values()) > 0:
|
||||
# example: if still not found, use the first entry
|
||||
tag_label = labels.values()[0]
|
||||
return tag_label
|
||||
|
||||
|
||||
def get_tag_label(tag_category, tag_name, lang):
|
||||
"""Get tag label from OSM_KEYS_TAGS"""
|
||||
tag_name = '' if tag_name is None else tag_name
|
||||
tag_labels = OSM_KEYS_TAGS['tags'].get(tag_category, {}).get(tag_name, {})
|
||||
return get_label(tag_labels, lang)
|
||||
|
||||
|
||||
def get_key_label(key_name, lang):
|
||||
"""Get key label from OSM_KEYS_TAGS"""
|
||||
if key_name.startswith('currency:'):
|
||||
# currency:EUR --> get the name from the CURRENCIES variable
|
||||
# see https://wiki.openstreetmap.org/wiki/Key%3Acurrency
|
||||
# and for example https://taginfo.openstreetmap.org/keys/currency:EUR#values
|
||||
# but there is also currency=EUR (currently not handled)
|
||||
# https://taginfo.openstreetmap.org/keys/currency#values
|
||||
currency = key_name.split(':')
|
||||
if len(currency) > 1:
|
||||
o = CURRENCIES['iso4217'].get(currency[1])
|
||||
if o:
|
||||
return get_label(o, lang).lower()
|
||||
return currency[1]
|
||||
|
||||
labels = OSM_KEYS_TAGS['keys']
|
||||
for k in key_name.split(':') + ['*']:
|
||||
labels = labels.get(k)
|
||||
if labels is None:
|
||||
return None
|
||||
return get_label(labels, lang)
|
||||
|
|
|
@ -22,20 +22,26 @@ The engine has the following (additional) settings:
|
|||
- :py:obj:`search_mode`
|
||||
- :py:obj:`search_type`
|
||||
|
||||
The :py:obj:`base_url` has to be set in the engine named `yacy` and is used by
|
||||
all yacy engines.
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
- name: yacy
|
||||
engine: yacy
|
||||
categories: general
|
||||
search_type: text
|
||||
base_url: https://yacy.searchlab.eu
|
||||
shortcut: ya
|
||||
base_url:
|
||||
- https://yacy.searchlab.eu
|
||||
- https://search.lomig.me
|
||||
- https://yacy.ecosys.eu
|
||||
- https://search.webproject.link
|
||||
|
||||
- name: yacy images
|
||||
engine: yacy
|
||||
categories: images
|
||||
search_type: image
|
||||
base_url: https://yacy.searchlab.eu
|
||||
shortcut: yai
|
||||
disabled: true
|
||||
|
||||
|
@ -45,6 +51,9 @@ Implementations
|
|||
"""
|
||||
# pylint: disable=fixme
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
from json import loads
|
||||
from urllib.parse import urlencode
|
||||
from dateutil import parser
|
||||
|
@ -87,15 +96,10 @@ search_type = 'text'
|
|||
``video`` are not yet implemented (Pull-Requests are welcome).
|
||||
"""
|
||||
|
||||
# search-url
|
||||
base_url = 'https://yacy.searchlab.eu'
|
||||
search_url = (
|
||||
'/yacysearch.json?{query}'
|
||||
'&startRecord={offset}'
|
||||
'&maximumRecords={limit}'
|
||||
'&contentdom={search_type}'
|
||||
'&resource={resource}'
|
||||
)
|
||||
base_url: list | str = 'https://yacy.searchlab.eu'
|
||||
"""The value is an URL or a list of URLs. In the latter case instance will be
|
||||
selected randomly.
|
||||
"""
|
||||
|
||||
|
||||
def init(_):
|
||||
|
@ -108,23 +112,34 @@ def init(_):
|
|||
raise ValueError('search_type "%s" is not one of %s' % (search_type, valid_types))
|
||||
|
||||
|
||||
def _base_url() -> str:
|
||||
from searx.engines import engines # pylint: disable=import-outside-toplevel
|
||||
|
||||
url = engines['yacy'].base_url # type: ignore
|
||||
if isinstance(url, list):
|
||||
url = random.choice(url)
|
||||
return url
|
||||
|
||||
|
||||
def request(query, params):
|
||||
|
||||
offset = (params['pageno'] - 1) * number_of_results
|
||||
|
||||
params['url'] = base_url + search_url.format(
|
||||
query=urlencode({'query': query}),
|
||||
offset=offset,
|
||||
limit=number_of_results,
|
||||
search_type=search_type,
|
||||
resource=search_mode,
|
||||
)
|
||||
|
||||
if http_digest_auth_user and http_digest_auth_pass:
|
||||
params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
|
||||
args = {
|
||||
'query': query,
|
||||
'startRecord': offset,
|
||||
'maximumRecords': number_of_results,
|
||||
'contentdom': search_type,
|
||||
'resource': search_mode,
|
||||
}
|
||||
|
||||
# add language tag if specified
|
||||
if params['language'] != 'all':
|
||||
params['url'] += '&lr=lang_' + params['language'].split('-')[0]
|
||||
args['lr'] = 'lang_' + params['language'].split('-')[0]
|
||||
|
||||
params["url"] = f"{_base_url()}/yacysearch.json?{urlencode(args)}"
|
||||
|
||||
if http_digest_auth_user and http_digest_auth_pass:
|
||||
params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
|
||||
|
||||
return params
|
||||
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Calculate mathematical expressions using ack#eval
|
||||
"""
|
||||
|
||||
import ast
|
||||
import operator
|
||||
|
||||
from flask_babel import gettext
|
||||
from searx import settings
|
||||
|
||||
name = "Basic Calculator"
|
||||
description = gettext("Calculate mathematical expressions via the search bar")
|
||||
default_on = False
|
||||
|
||||
preference_section = 'general'
|
||||
plugin_id = 'calculator'
|
||||
|
||||
operators = {
|
||||
ast.Add: operator.add,
|
||||
ast.Sub: operator.sub,
|
||||
ast.Mult: operator.mul,
|
||||
ast.Div: operator.truediv,
|
||||
ast.Pow: operator.pow,
|
||||
ast.BitXor: operator.xor,
|
||||
ast.USub: operator.neg,
|
||||
}
|
||||
|
||||
|
||||
def _eval_expr(expr):
|
||||
"""
|
||||
>>> _eval_expr('2^6')
|
||||
4
|
||||
>>> _eval_expr('2**6')
|
||||
64
|
||||
>>> _eval_expr('1 + 2*3**(4^5) / (6 + -7)')
|
||||
-5.0
|
||||
"""
|
||||
return _eval(ast.parse(expr, mode='eval').body)
|
||||
|
||||
|
||||
def _eval(node):
|
||||
if isinstance(node, ast.Constant) and isinstance(node.value, int):
|
||||
return node.value
|
||||
|
||||
if isinstance(node, ast.BinOp):
|
||||
return operators[type(node.op)](_eval(node.left), _eval(node.right))
|
||||
|
||||
if isinstance(node, ast.UnaryOp):
|
||||
return operators[type(node.op)](_eval(node.operand))
|
||||
|
||||
raise TypeError(node)
|
||||
|
||||
|
||||
def post_search(_request, search):
|
||||
# don't run on public instances due to possible attack surfaces
|
||||
if settings['server']['public_instance']:
|
||||
return True
|
||||
|
||||
# only show the result of the expression on the first page
|
||||
if search.search_query.pageno > 1:
|
||||
return True
|
||||
|
||||
query = search.search_query.query
|
||||
# in order to avoid DoS attacks with long expressions, ignore long expressions
|
||||
if len(query) > 100:
|
||||
return True
|
||||
|
||||
# replace commonly used math operators with their proper Python operator
|
||||
query = query.replace("x", "*").replace(":", "/")
|
||||
|
||||
# only numbers and math operators are accepted
|
||||
if any(str.isalpha(c) for c in query):
|
||||
return True
|
||||
|
||||
# in python, powers are calculated via **
|
||||
query_py_formatted = query.replace("^", "**")
|
||||
try:
|
||||
result = str(_eval_expr(query_py_formatted))
|
||||
if result != query:
|
||||
search.result_container.answers['calculate'] = {'answer': f"{query} = {result}"}
|
||||
except (TypeError, SyntaxError, ArithmeticError):
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_allowed():
|
||||
return not settings['server']['public_instance']
|
|
@ -1,58 +1,245 @@
|
|||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
"""Calculate mathematical expressions using ack#eval
|
||||
"""A plugin for converting measured values from one unit to another unit (a
|
||||
unit converter).
|
||||
|
||||
The plugin looks up the symbols (given in the query term) in a list of
|
||||
converters, each converter is one item in the list (compare
|
||||
:py:obj:`ADDITIONAL_UNITS`). If the symbols are ambiguous, the matching units
|
||||
of measurement are evaluated. The weighting in the evaluation results from the
|
||||
sorting of the :py:obj:`list of unit converters<symbol_to_si>`.
|
||||
|
||||
Enable in ``settings.yml``:
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
enabled_plugins:
|
||||
..
|
||||
- 'Unit converter plugin'
|
||||
|
||||
"""
|
||||
|
||||
from flask_babel import gettext
|
||||
import re
|
||||
import babel.numbers
|
||||
from flask_babel import gettext, get_locale
|
||||
|
||||
from searx import data
|
||||
|
||||
from searx.data import WIKIDATA_UNITS
|
||||
|
||||
name = "Unit converter plugin"
|
||||
description = gettext("Convert between units")
|
||||
default_on = True
|
||||
|
||||
plugin_id = "unit_converter"
|
||||
preference_section = "general"
|
||||
|
||||
CONVERT_KEYWORDS = ["in", "to", "as"]
|
||||
|
||||
|
||||
def _convert(from_value, source_si_factor, target_si_factor):
|
||||
return from_value * source_si_factor / target_si_factor
|
||||
# inspired from https://stackoverflow.com/a/42475086
|
||||
RE_MEASURE = r'''
|
||||
(?P<sign>[-+]?) # +/- or nothing for positive
|
||||
(\s*) # separator: white space or nothing
|
||||
(?P<number>[\d\.,]*) # number: 1,000.00 (en) or 1.000,00 (de)
|
||||
(?P<E>[eE][-+]?\d+)? # scientific notation: e(+/-)2 (*10^2)
|
||||
(\s*) # separator: white space or nothing
|
||||
(?P<unit>\S+) # unit of measure
|
||||
'''
|
||||
|
||||
|
||||
def _parse_text_and_convert(search, splitted_query):
|
||||
if len(splitted_query) != 2 or splitted_query[0].strip() == "" or splitted_query[1].strip() == "":
|
||||
ADDITIONAL_UNITS = [
|
||||
{
|
||||
"si_name": "Q11579",
|
||||
"symbol": "°C",
|
||||
"to_si": lambda val: val + 273.15,
|
||||
"from_si": lambda val: val - 273.15,
|
||||
},
|
||||
{
|
||||
"si_name": "Q11579",
|
||||
"symbol": "°F",
|
||||
"to_si": lambda val: (val + 459.67) * 5 / 9,
|
||||
"from_si": lambda val: (val * 9 / 5) - 459.67,
|
||||
},
|
||||
]
|
||||
"""Additional items to convert from a measure unit to a SI unit (vice versa).
|
||||
|
||||
.. code:: python
|
||||
|
||||
{
|
||||
"si_name": "Q11579", # Wikidata item ID of the SI unit (Kelvin)
|
||||
"symbol": "°C", # symbol of the measure unit
|
||||
"to_si": lambda val: val + 273.15, # convert measure value (val) to SI unit
|
||||
"from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
|
||||
},
|
||||
{
|
||||
"si_name": "Q11573",
|
||||
"symbol": "mi",
|
||||
"to_si": 1609.344, # convert measure value (val) to SI unit
|
||||
"from_si": 1 / 1609.344 # convert SI value (val) measure unit
|
||||
},
|
||||
|
||||
The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
|
||||
or a callable_ (val in / converted value returned).
|
||||
|
||||
.. _callable: https://docs.python.org/3/glossary.html#term-callable
|
||||
"""
|
||||
|
||||
|
||||
ALIAS_SYMBOLS = {
|
||||
'°C': ('C',),
|
||||
'°F': ('F',),
|
||||
'mi': ('L',),
|
||||
}
|
||||
"""Alias symbols for known unit of measure symbols / by example::
|
||||
|
||||
'°C': ('C', ...), # list of alias symbols for °C (Q69362731)
|
||||
'°F': ('F', ...), # list of alias symbols for °F (Q99490479)
|
||||
'mi': ('L',), # list of alias symbols for mi (Q253276)
|
||||
"""
|
||||
|
||||
|
||||
SYMBOL_TO_SI = []
|
||||
|
||||
|
||||
def symbol_to_si():
|
||||
"""Generates a list of tuples, each tuple is a measure unit and the fields
|
||||
in the tuple are:
|
||||
|
||||
0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
|
||||
|
||||
1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
|
||||
|
||||
2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
|
||||
multiplied by 1609.344)
|
||||
|
||||
3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
|
||||
100mi divided by 1609.344)
|
||||
|
||||
The returned list is sorted, the first items are created from
|
||||
``WIKIDATA_UNITS``, the second group of items is build from
|
||||
:py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
|
||||
|
||||
If you search this list for a symbol, then a match with a symbol from
|
||||
Wikidata has the highest weighting (first hit in the list), followed by the
|
||||
symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
|
||||
given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
|
||||
|
||||
"""
|
||||
|
||||
global SYMBOL_TO_SI # pylint: disable=global-statement
|
||||
if SYMBOL_TO_SI:
|
||||
return SYMBOL_TO_SI
|
||||
|
||||
# filter out units which can't be normalized to a SI unit and filter out
|
||||
# units without a symbol / arcsecond does not have a symbol
|
||||
# https://www.wikidata.org/wiki/Q829073
|
||||
|
||||
for item in data.WIKIDATA_UNITS.values():
|
||||
if item['to_si_factor'] and item['symbol']:
|
||||
SYMBOL_TO_SI.append(
|
||||
(
|
||||
item['symbol'],
|
||||
item['si_name'],
|
||||
item['to_si_factor'], # from_si
|
||||
1 / item['to_si_factor'], # to_si
|
||||
item['symbol'],
|
||||
)
|
||||
)
|
||||
|
||||
for item in ADDITIONAL_UNITS:
|
||||
SYMBOL_TO_SI.append(
|
||||
(
|
||||
item['symbol'],
|
||||
item['si_name'],
|
||||
item['from_si'],
|
||||
item['to_si'],
|
||||
item['symbol'],
|
||||
)
|
||||
)
|
||||
|
||||
alias_items = []
|
||||
for item in SYMBOL_TO_SI:
|
||||
for alias in ALIAS_SYMBOLS.get(item[0], ()):
|
||||
alias_items.append(
|
||||
(
|
||||
alias,
|
||||
item[1],
|
||||
item[2], # from_si
|
||||
item[3], # to_si
|
||||
item[0], # origin unit
|
||||
)
|
||||
)
|
||||
SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
|
||||
return SYMBOL_TO_SI
|
||||
|
||||
|
||||
def _parse_text_and_convert(search, from_query, to_query):
|
||||
|
||||
# pylint: disable=too-many-branches, too-many-locals
|
||||
|
||||
if not (from_query and to_query):
|
||||
return
|
||||
|
||||
from_value = ""
|
||||
from_unit_key = ""
|
||||
|
||||
# only parse digits as value that belong together
|
||||
read_alpha = False
|
||||
for c in splitted_query[0]:
|
||||
if not read_alpha and (c in ("-", ".") or str.isdigit(c)):
|
||||
from_value += c
|
||||
read_alpha = True
|
||||
elif c != " ":
|
||||
from_unit_key += c
|
||||
|
||||
to_unit_key = splitted_query[1].strip()
|
||||
|
||||
from_unit = None
|
||||
to_unit = None
|
||||
|
||||
for unit in WIKIDATA_UNITS.values():
|
||||
if unit['symbol'] == from_unit_key:
|
||||
from_unit = unit
|
||||
|
||||
if unit['symbol'] == to_unit_key:
|
||||
to_unit = unit
|
||||
|
||||
if from_unit and to_unit:
|
||||
break
|
||||
|
||||
if from_unit is None or to_unit is None or to_unit.get('si_name') != from_unit.get('si_name'):
|
||||
measured = re.match(RE_MEASURE, from_query, re.VERBOSE)
|
||||
if not (measured and measured.group('number'), measured.group('unit')):
|
||||
return
|
||||
|
||||
result = _convert(float(from_value), from_unit['to_si_factor'], to_unit['to_si_factor'])
|
||||
search.result_container.answers['conversion'] = {'answer': f"{result:g} {to_unit['symbol']}"}
|
||||
# Symbols are not unique, if there are several hits for the from-unit, then
|
||||
# the correct one must be determined by comparing it with the to-unit
|
||||
# https://github.com/searxng/searxng/pull/3378#issuecomment-2080974863
|
||||
|
||||
# first: collecting possible units
|
||||
|
||||
source_list, target_list = [], []
|
||||
|
||||
for symbol, si_name, from_si, to_si, orig_symbol in symbol_to_si():
|
||||
|
||||
if symbol == measured.group('unit'):
|
||||
source_list.append((si_name, to_si))
|
||||
if symbol == to_query:
|
||||
target_list.append((si_name, from_si, orig_symbol))
|
||||
|
||||
if not (source_list and target_list):
|
||||
return
|
||||
|
||||
source_to_si = target_from_si = target_symbol = None
|
||||
|
||||
# second: find the right unit by comparing list of from-units with list of to-units
|
||||
|
||||
for source in source_list:
|
||||
for target in target_list:
|
||||
if source[0] == target[0]: # compare si_name
|
||||
source_to_si = source[1]
|
||||
target_from_si = target[1]
|
||||
target_symbol = target[2]
|
||||
|
||||
if not (source_to_si and target_from_si):
|
||||
return
|
||||
|
||||
_locale = get_locale() or 'en_US'
|
||||
|
||||
value = measured.group('sign') + measured.group('number') + (measured.group('E') or '')
|
||||
value = babel.numbers.parse_decimal(value, locale=_locale)
|
||||
|
||||
# convert value to SI unit
|
||||
|
||||
if isinstance(source_to_si, (float, int)):
|
||||
value = float(value) * source_to_si
|
||||
else:
|
||||
value = source_to_si(float(value))
|
||||
|
||||
# convert value from SI unit to target unit
|
||||
|
||||
if isinstance(target_from_si, (float, int)):
|
||||
value = float(value) * target_from_si
|
||||
else:
|
||||
value = target_from_si(float(value))
|
||||
|
||||
if measured.group('E'):
|
||||
# when incomming notation is scientific, outgoing notation is scientific
|
||||
result = babel.numbers.format_scientific(value, locale=_locale)
|
||||
else:
|
||||
result = babel.numbers.format_decimal(value, locale=_locale, format='#,##0.##########;-#')
|
||||
|
||||
search.result_container.answers['conversion'] = {'answer': f'{result} {target_symbol}'}
|
||||
|
||||
|
||||
def post_search(_request, search):
|
||||
|
@ -69,8 +256,8 @@ def post_search(_request, search):
|
|||
for query_part in query_parts:
|
||||
for keyword in CONVERT_KEYWORDS:
|
||||
if query_part == keyword:
|
||||
keyword_split = query.split(keyword, 1)
|
||||
_parse_text_and_convert(search, keyword_split)
|
||||
from_query, to_query = query.split(keyword, 1)
|
||||
_parse_text_and_convert(search, from_query.strip(), to_query.strip())
|
||||
return True
|
||||
|
||||
return True
|
||||
|
|
|
@ -3,33 +3,14 @@
|
|||
|
||||
"""
|
||||
|
||||
import unicodedata
|
||||
import re
|
||||
|
||||
from searx.data import CURRENCIES
|
||||
from searx.data import fetch_iso4217_from_user, fetch_name_from_iso4217
|
||||
from .online import OnlineProcessor
|
||||
|
||||
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
|
||||
|
||||
|
||||
def normalize_name(name):
|
||||
name = name.lower().replace('-', ' ').rstrip('s')
|
||||
name = re.sub(' +', ' ', name)
|
||||
return unicodedata.normalize('NFKD', name).lower()
|
||||
|
||||
|
||||
def name_to_iso4217(name):
|
||||
name = normalize_name(name)
|
||||
currency = CURRENCIES['names'].get(name, [name])
|
||||
if isinstance(currency, str):
|
||||
return currency
|
||||
return currency[0]
|
||||
|
||||
|
||||
def iso4217_to_name(iso4217, language):
|
||||
return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217)
|
||||
|
||||
|
||||
class OnlineCurrencyProcessor(OnlineProcessor):
|
||||
"""Processor class used by ``online_currency`` engines."""
|
||||
|
||||
|
@ -52,14 +33,17 @@ class OnlineCurrencyProcessor(OnlineProcessor):
|
|||
amount = float(amount_str)
|
||||
except ValueError:
|
||||
return None
|
||||
from_currency = name_to_iso4217(from_currency.strip())
|
||||
to_currency = name_to_iso4217(to_currency.strip())
|
||||
from_currency = fetch_iso4217_from_user(from_currency.strip())
|
||||
to_currency = fetch_iso4217_from_user(to_currency.strip())
|
||||
|
||||
if from_currency is None or to_currency is None:
|
||||
return None
|
||||
|
||||
params['amount'] = amount
|
||||
params['from'] = from_currency
|
||||
params['to'] = to_currency
|
||||
params['from_name'] = iso4217_to_name(from_currency, 'en')
|
||||
params['to_name'] = iso4217_to_name(to_currency, 'en')
|
||||
params['from_name'] = fetch_name_from_iso4217(from_currency, 'en')
|
||||
params['to_name'] = fetch_name_from_iso4217(to_currency, 'en')
|
||||
return params
|
||||
|
||||
def get_default_tests(self):
|
||||
|
|
|
@ -220,6 +220,7 @@ outgoing:
|
|||
# - 'Ahmia blacklist' # activation depends on outgoing.using_tor_proxy
|
||||
# # these plugins are disabled if nothing is configured ..
|
||||
# - 'Hostname replace' # see hostname_replace configuration below
|
||||
# - 'Calculator plugin'
|
||||
# - 'Open Access DOI rewrite'
|
||||
# - 'Tor check plugin'
|
||||
# # Read the docs before activate: auto-detection of the language could be
|
||||
|
@ -2081,7 +2082,11 @@ engines:
|
|||
engine: yacy
|
||||
categories: general
|
||||
search_type: text
|
||||
base_url: https://yacy.searchlab.eu
|
||||
base_url:
|
||||
- https://yacy.searchlab.eu
|
||||
- https://search.lomig.me
|
||||
- https://yacy.ecosys.eu
|
||||
- https://search.webproject.link
|
||||
shortcut: ya
|
||||
disabled: true
|
||||
# required if you aren't using HTTPS for your local yacy instance
|
||||
|
@ -2094,7 +2099,6 @@ engines:
|
|||
engine: yacy
|
||||
categories: images
|
||||
search_type: image
|
||||
base_url: https://yacy.searchlab.eu
|
||||
shortcut: yai
|
||||
disabled: true
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
{%- macro plugin_preferences(section) -%}
|
||||
{%- for plugin in plugins -%}
|
||||
{%- if plugin.preference_section == section -%}
|
||||
{%- if plugin.preference_section == section and (plugin.is_allowed() if plugin.is_allowed else True) -%}
|
||||
<fieldset>{{- '' -}}
|
||||
<legend>{{ _(plugin.name) }}</legend>{{- '' -}}
|
||||
<div class="value">
|
||||
|
|
|
@ -58,7 +58,7 @@ from searx import infopage
|
|||
from searx import limiter
|
||||
from searx.botdetection import link_token
|
||||
|
||||
from searx.data import ENGINE_DESCRIPTIONS
|
||||
from searx.data import fetch_engine_descriptions
|
||||
from searx.results import Timing
|
||||
from searx.settings_defaults import OUTPUT_FORMATS
|
||||
from searx.settings_loader import get_default_settings_path
|
||||
|
@ -1102,17 +1102,10 @@ def image_proxy():
|
|||
@app.route('/engine_descriptions.json', methods=['GET'])
|
||||
def engine_descriptions():
|
||||
locale = get_locale().split('_')[0]
|
||||
result = ENGINE_DESCRIPTIONS['en'].copy()
|
||||
result = fetch_engine_descriptions('en')
|
||||
if locale != 'en':
|
||||
for engine, description in ENGINE_DESCRIPTIONS.get(locale, {}).items():
|
||||
for engine, description in fetch_engine_descriptions(locale).items():
|
||||
result[engine] = description
|
||||
for engine, description in result.items():
|
||||
if len(description) == 2 and description[1] == 'ref':
|
||||
ref_engine, ref_lang = description[0].split(':')
|
||||
description = ENGINE_DESCRIPTIONS[ref_lang][ref_engine]
|
||||
if isinstance(description, str):
|
||||
description = [description, 'wikipedia']
|
||||
result[engine] = description
|
||||
|
||||
# overwrite by about:description (from settings)
|
||||
for engine_name, engine_mod in engines.items():
|
||||
|
|
|
@ -9,15 +9,20 @@ Output file: :origin:`searx/data/currencies.json` (:origin:`CI Update data ...
|
|||
|
||||
# pylint: disable=invalid-name
|
||||
|
||||
import csv
|
||||
import re
|
||||
import unicodedata
|
||||
import json
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
from searx.network import set_timeout_for_thread
|
||||
from searx.locales import LOCALE_NAMES, locales_initialize
|
||||
from searx.engines import wikidata, set_loggers
|
||||
from searx.data import data_dir
|
||||
|
||||
DATA_FILE = data_dir / 'currencies.json'
|
||||
DATABASE_FILE = data_dir / 'currencies.db'
|
||||
CSV_FILE = data_dir / 'dumps' / 'currencies.csv'
|
||||
|
||||
|
||||
set_loggers(wikidata, 'wikidata')
|
||||
locales_initialize()
|
||||
|
@ -75,57 +80,45 @@ def _normalize_name(name):
|
|||
return name
|
||||
|
||||
|
||||
def add_currency_name(db, name, iso4217, normalize_name=True):
|
||||
db_names = db['names']
|
||||
|
||||
def add_entry(db, language, iso4217, name, normalize_name=True):
|
||||
if normalize_name:
|
||||
name = _normalize_name(name)
|
||||
|
||||
iso4217_set = db_names.setdefault(name, [])
|
||||
if iso4217 not in iso4217_set:
|
||||
iso4217_set.insert(0, iso4217)
|
||||
|
||||
|
||||
def add_currency_label(db, label, iso4217, language):
|
||||
labels = db['iso4217'].setdefault(iso4217, {})
|
||||
labels[language] = label
|
||||
entry = (language, iso4217, name)
|
||||
db.add(entry)
|
||||
|
||||
|
||||
def wikidata_request_result_iterator(request):
|
||||
set_timeout_for_thread(60)
|
||||
result = wikidata.send_wikidata_query(request.replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
|
||||
if result is not None:
|
||||
yield from result['results']['bindings']
|
||||
|
||||
|
||||
def fetch_db():
|
||||
db = {
|
||||
'names': {},
|
||||
'iso4217': {},
|
||||
}
|
||||
db = set()
|
||||
|
||||
for r in wikidata_request_result_iterator(SPARQL_WIKIPEDIA_NAMES_REQUEST):
|
||||
iso4217 = r['iso4217']['value']
|
||||
article_name = r['article_name']['value']
|
||||
article_lang = r['article_name']['xml:lang']
|
||||
add_currency_name(db, article_name, iso4217)
|
||||
add_currency_label(db, article_name, iso4217, article_lang)
|
||||
add_entry(db, article_lang, iso4217, article_name)
|
||||
|
||||
for r in wikidata_request_result_iterator(SARQL_REQUEST):
|
||||
iso4217 = r['iso4217']['value']
|
||||
if 'label' in r:
|
||||
label = r['label']['value']
|
||||
label_lang = r['label']['xml:lang']
|
||||
add_currency_name(db, label, iso4217)
|
||||
add_currency_label(db, label, iso4217, label_lang)
|
||||
add_entry(db, label_lang, iso4217, label)
|
||||
|
||||
if 'alias' in r:
|
||||
add_currency_name(db, r['alias']['value'], iso4217)
|
||||
add_entry(db, "", iso4217, r['alias']['value'])
|
||||
|
||||
if 'unicode' in r:
|
||||
add_currency_name(db, r['unicode']['value'], iso4217, normalize_name=False)
|
||||
add_entry(db, "", iso4217, r['unicode']['value'], normalize_name=False)
|
||||
|
||||
if 'unit' in r:
|
||||
add_currency_name(db, r['unit']['value'], iso4217, normalize_name=False)
|
||||
add_entry(db, "", iso4217, r['unit']['value'], normalize_name=False)
|
||||
|
||||
return db
|
||||
|
||||
|
@ -135,22 +128,33 @@ def main():
|
|||
db = fetch_db()
|
||||
|
||||
# static
|
||||
add_currency_name(db, "euro", 'EUR')
|
||||
add_currency_name(db, "euros", 'EUR')
|
||||
add_currency_name(db, "dollar", 'USD')
|
||||
add_currency_name(db, "dollars", 'USD')
|
||||
add_currency_name(db, "peso", 'MXN')
|
||||
add_currency_name(db, "pesos", 'MXN')
|
||||
add_entry(db, "", 'EUR', "euro")
|
||||
add_entry(db, "", 'EUR', "euros")
|
||||
add_entry(db, "", 'USD', "dollar")
|
||||
add_entry(db, "", 'USD', "dollars")
|
||||
add_entry(
|
||||
db,
|
||||
"",
|
||||
'MXN',
|
||||
"peso",
|
||||
)
|
||||
add_entry(db, "", 'MXN', "pesos")
|
||||
|
||||
# reduce memory usage:
|
||||
# replace lists with one item by the item. see
|
||||
# searx.search.processors.online_currency.name_to_iso4217
|
||||
for name in db['names']:
|
||||
if len(db['names'][name]) == 1:
|
||||
db['names'][name] = db['names'][name][0]
|
||||
|
||||
with DATA_FILE.open('w', encoding='utf8') as f:
|
||||
json.dump(db, f, indent=4, sort_keys=True, ensure_ascii=False)
|
||||
db = list(db)
|
||||
db.sort(key=lambda entry: (entry[0], entry[1], entry[2]))
|
||||
Path(DATABASE_FILE).unlink(missing_ok=True)
|
||||
with sqlite3.connect(DATABASE_FILE) as con:
|
||||
cur = con.cursor()
|
||||
cur.execute("CREATE TABLE currencies(language, iso4217, name)")
|
||||
cur.executemany("INSERT INTO currencies VALUES(?, ?, ?)", db)
|
||||
cur.execute("CREATE INDEX index_currencies_iso4217 ON currencies('iso4217')")
|
||||
cur.execute("CREATE INDEX index_currencies_name ON currencies('name')")
|
||||
con.commit()
|
||||
with CSV_FILE.open('w', encoding='utf8') as f:
|
||||
w = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
|
||||
w.writerow(["language", "iso4217", "name"])
|
||||
for row in db:
|
||||
w.writerow(row)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -9,22 +9,24 @@ Output file: :origin:`searx/data/engine_descriptions.json`.
|
|||
|
||||
# pylint: disable=invalid-name, global-statement
|
||||
|
||||
import csv
|
||||
import json
|
||||
import sqlite3
|
||||
from urllib.parse import urlparse
|
||||
from os.path import join
|
||||
from pathlib import Path
|
||||
|
||||
from lxml.html import fromstring
|
||||
|
||||
from searx.engines import wikidata, set_loggers
|
||||
from searx.utils import extract_text, searx_useragent
|
||||
from searx.locales import LOCALE_NAMES, locales_initialize, match_locale
|
||||
from searx import searx_dir
|
||||
from searx.utils import gen_useragent, detect_language
|
||||
import searx.search
|
||||
import searx.network
|
||||
from searx.data import data_dir
|
||||
|
||||
DATA_FILE = data_dir / 'engine_descriptions.json'
|
||||
DATABASE_FILE = data_dir / 'engine_descriptions.db'
|
||||
CSV_FILE = data_dir / 'dumps' / 'engine_descriptions.csv'
|
||||
|
||||
set_loggers(wikidata, 'wikidata')
|
||||
locales_initialize()
|
||||
|
@ -323,37 +325,32 @@ def fetch_website_descriptions():
|
|||
fetch_website_description(engine_name, website)
|
||||
|
||||
|
||||
def get_engine_descriptions_filename():
|
||||
return join(join(searx_dir, "data"), "engine_descriptions.json")
|
||||
|
||||
|
||||
def get_output():
|
||||
def write_db():
|
||||
"""
|
||||
From descriptions[engine][language] = [description, source]
|
||||
To
|
||||
Erase and write the SQLite database searx/data/engine_descriptions.db :
|
||||
* create one table engine_descriptions
|
||||
* dump write all the values
|
||||
|
||||
* output[language][engine] = description_and_source
|
||||
* description_and_source can be:
|
||||
* [description, source]
|
||||
* description (if source = "wikipedia")
|
||||
* [f"engine:lang", "ref"] (reference to another existing description)
|
||||
Make a JSON dump of the values into engine_descriptions.json
|
||||
"""
|
||||
output = {locale: {} for locale in LOCALE_NAMES}
|
||||
|
||||
seen_descriptions = {}
|
||||
|
||||
for engine_name, lang_descriptions in descriptions.items():
|
||||
for language, description in lang_descriptions.items():
|
||||
if description[0] in seen_descriptions:
|
||||
ref = seen_descriptions[description[0]]
|
||||
description = [f'{ref[0]}:{ref[1]}', 'ref']
|
||||
else:
|
||||
seen_descriptions[description[0]] = (engine_name, language)
|
||||
if description[1] == 'wikipedia':
|
||||
description = description[0]
|
||||
output.setdefault(language, {}).setdefault(engine_name, description)
|
||||
|
||||
return output
|
||||
data = [
|
||||
(language, engine_name, description[0], description[1])
|
||||
for engine_name, lang_descriptions in descriptions.items()
|
||||
for language, description in lang_descriptions.items()
|
||||
]
|
||||
data.sort(key=lambda item: (item[0], item[1]))
|
||||
Path(DATABASE_FILE).unlink(missing_ok=True)
|
||||
with sqlite3.connect(DATABASE_FILE) as con:
|
||||
cur = con.cursor()
|
||||
cur.execute("CREATE TABLE engine_descriptions(language, engine, description, source)")
|
||||
cur.executemany("INSERT INTO engine_descriptions VALUES(?, ?, ?, ?)", data)
|
||||
cur.execute("CREATE INDEX index_engine_descriptions ON engine_descriptions('language')")
|
||||
con.commit()
|
||||
with CSV_FILE.open('w', encoding="utf8") as f:
|
||||
w = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
|
||||
w.writerow(["language", "engine", "description", "source"])
|
||||
for row in data:
|
||||
w.writerow(row)
|
||||
|
||||
|
||||
def main():
|
||||
|
@ -361,10 +358,7 @@ def main():
|
|||
fetch_wikidata_descriptions()
|
||||
fetch_wikipedia_descriptions()
|
||||
fetch_website_descriptions()
|
||||
|
||||
output = get_output()
|
||||
with DATA_FILE.open('w', encoding='utf8') as f:
|
||||
f.write(json.dumps(output, indent=1, separators=(',', ':'), sort_keys=True, ensure_ascii=False))
|
||||
write_db()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -42,8 +42,9 @@ Output file: :origin:`searx/data/osm_keys_tags` (:origin:`CI Update data ...
|
|||
|
||||
"""
|
||||
|
||||
import json
|
||||
import collections
|
||||
import csv
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
from searx.network import set_timeout_for_thread
|
||||
from searx.engines import wikidata, set_loggers
|
||||
|
@ -51,7 +52,9 @@ from searx.sxng_locales import sxng_locales
|
|||
from searx.engines.openstreetmap import get_key_rank, VALUE_TO_LINK
|
||||
from searx.data import data_dir
|
||||
|
||||
DATA_FILE = data_dir / 'osm_keys_tags.json'
|
||||
DATABASE_FILE = data_dir / 'osm_keys_tags.db'
|
||||
CSV_KEYS_FILE = data_dir / 'dumps' / 'osm_keys.csv'
|
||||
CSV_TAGS_FILE = data_dir / 'dumps' / 'osm_tags.csv'
|
||||
|
||||
set_loggers(wikidata, 'wikidata')
|
||||
|
||||
|
@ -78,42 +81,39 @@ ORDER BY ?key ?item ?itemLabel
|
|||
|
||||
LANGUAGES = [l[0].lower() for l in sxng_locales]
|
||||
|
||||
PRESET_KEYS = {
|
||||
('wikidata',): {'en': 'Wikidata'},
|
||||
('wikipedia',): {'en': 'Wikipedia'},
|
||||
('email',): {'en': 'Email'},
|
||||
('facebook',): {'en': 'Facebook'},
|
||||
('fax',): {'en': 'Fax'},
|
||||
('internet_access', 'ssid'): {'en': 'Wi-Fi'},
|
||||
}
|
||||
PRESET_KEYS = [
|
||||
["wikidata", "en", "Wikidata"],
|
||||
["wikipedia", "en", "Wikipedia"],
|
||||
["email", "en", "email"],
|
||||
["facebook", "en", "facebook"],
|
||||
["fax", "en", "Fax"],
|
||||
["internet_access:ssid", "en", "Wi-Fi"],
|
||||
]
|
||||
|
||||
INCLUDED_KEYS = {('addr',)}
|
||||
|
||||
|
||||
def get_preset_keys():
|
||||
results = collections.OrderedDict()
|
||||
for keys, value in PRESET_KEYS.items():
|
||||
r = results
|
||||
for k in keys:
|
||||
r = r.setdefault(k, {})
|
||||
r.setdefault('*', value)
|
||||
return results
|
||||
|
||||
|
||||
def get_keys():
|
||||
results = get_preset_keys()
|
||||
result_keys = set()
|
||||
results = PRESET_KEYS.copy()
|
||||
response = wikidata.send_wikidata_query(SPARQL_KEYS_REQUEST)
|
||||
|
||||
for key in response['results']['bindings']:
|
||||
keys = key['key']['value'].split(':')[1:]
|
||||
label = key['itemLabel']['value'].lower()
|
||||
lang = key['itemLabel']['xml:lang']
|
||||
|
||||
if lang not in LANGUAGES:
|
||||
continue
|
||||
|
||||
if keys[0] == 'currency' and len(keys) > 1:
|
||||
# special case in openstreetmap.py
|
||||
continue
|
||||
if keys[0] == 'contact' and len(keys) > 1:
|
||||
# label for the key "contact.email" is "Email"
|
||||
# whatever the language
|
||||
r = results.setdefault('contact', {})
|
||||
r[keys[1]] = {'*': {'en': keys[1]}}
|
||||
if lang == "en":
|
||||
# label for the key "contact.email" is "Email"
|
||||
# whatever the language
|
||||
results.append((":".join(keys), "en", keys[1]))
|
||||
continue
|
||||
if tuple(keys) in PRESET_KEYS:
|
||||
# skip presets (already set above)
|
||||
|
@ -125,40 +125,46 @@ def get_keys():
|
|||
):
|
||||
# keep only keys that will be displayed by openstreetmap.py
|
||||
continue
|
||||
label = key['itemLabel']['value'].lower()
|
||||
lang = key['itemLabel']['xml:lang']
|
||||
r = results
|
||||
for k in keys:
|
||||
r = r.setdefault(k, {})
|
||||
r = r.setdefault('*', {})
|
||||
if lang in LANGUAGES:
|
||||
r.setdefault(lang, label)
|
||||
|
||||
entry = (":".join(keys), lang, label)
|
||||
entry_key = (entry[0], entry[1])
|
||||
if entry_key not in result_keys:
|
||||
results.append(entry)
|
||||
result_keys.add(entry_key)
|
||||
|
||||
# special cases
|
||||
results['delivery']['covid19']['*'].clear()
|
||||
for k, v in results['delivery']['*'].items():
|
||||
results['delivery']['covid19']['*'][k] = v + ' (COVID19)'
|
||||
results = [entry for entry in results if entry[0] != 'delivery:covid19']
|
||||
results.extend(
|
||||
[['delivery:covid19', entry[1], entry[2] + ' (COVID19)'] for entry in results if entry[0] == 'delivery']
|
||||
)
|
||||
|
||||
results['opening_hours']['covid19']['*'].clear()
|
||||
for k, v in results['opening_hours']['*'].items():
|
||||
results['opening_hours']['covid19']['*'][k] = v + ' (COVID19)'
|
||||
results = [entry for entry in results if entry[0] != 'opening_hours:covid19']
|
||||
results.extend(
|
||||
[
|
||||
['opening_hours:covid19', entry[1], entry[2] + ' (COVID19)']
|
||||
for entry in results
|
||||
if entry[0] == 'opening_hours'
|
||||
]
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def get_tags():
|
||||
results = collections.OrderedDict()
|
||||
results = []
|
||||
response = wikidata.send_wikidata_query(SPARQL_TAGS_REQUEST)
|
||||
for tag in response['results']['bindings']:
|
||||
tag_names = tag['tag']['value'].split(':')[1].split('=')
|
||||
if len(tag_names) == 2:
|
||||
tag_category, tag_type = tag_names
|
||||
else:
|
||||
tag_category, tag_type = tag_names[0], ''
|
||||
try:
|
||||
tag_key, tag_value = tag['tag']['value'].split('=')
|
||||
if tag_key.startswith("Tag:"):
|
||||
tag_key = tag_key[4:]
|
||||
except ValueError:
|
||||
print("ignore tag", tag['tag']['value'])
|
||||
continue
|
||||
label = tag['itemLabel']['value'].lower()
|
||||
lang = tag['itemLabel']['xml:lang']
|
||||
if lang in LANGUAGES:
|
||||
results.setdefault(tag_category, {}).setdefault(tag_type, {}).setdefault(lang, label)
|
||||
results.append((tag_key, tag_value, lang, label))
|
||||
return results
|
||||
|
||||
|
||||
|
@ -206,9 +212,30 @@ def optimize_keys(data):
|
|||
if __name__ == '__main__':
|
||||
|
||||
set_timeout_for_thread(60)
|
||||
result = {
|
||||
'keys': optimize_keys(get_keys()),
|
||||
'tags': optimize_tags(get_tags()),
|
||||
}
|
||||
with DATA_FILE.open('w', encoding="utf8") as f:
|
||||
json.dump(result, f, indent=4, sort_keys=True, ensure_ascii=False)
|
||||
osm_keys = get_keys()
|
||||
osm_tags = get_tags()
|
||||
|
||||
osm_keys.sort(key=lambda item: (item[0], item[1]))
|
||||
osm_tags.sort(key=lambda item: (item[0], item[1]))
|
||||
|
||||
Path(DATABASE_FILE).unlink(missing_ok=True)
|
||||
with sqlite3.connect(DATABASE_FILE) as con:
|
||||
cur = con.cursor()
|
||||
cur.execute("CREATE TABLE osm_keys(name, language, label)")
|
||||
cur.executemany("INSERT INTO osm_keys VALUES(?, ?, ?)", osm_keys)
|
||||
cur.execute("CREATE INDEX index_osm_keys ON osm_keys('name', 'language')")
|
||||
cur.execute("CREATE TABLE osm_tags(tag_key, tag_value, language, label)")
|
||||
cur.executemany("INSERT INTO osm_tags VALUES(?, ?, ?, ?)", osm_tags)
|
||||
cur.execute("CREATE INDEX index_osm_tags ON osm_tags('tag_key', 'tag_value', 'language')")
|
||||
con.commit()
|
||||
|
||||
with CSV_KEYS_FILE.open('w', encoding="utf8") as f:
|
||||
w = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
|
||||
w.writerow(["name", "language", "label"])
|
||||
for row in osm_keys:
|
||||
w.writerow(row)
|
||||
with CSV_TAGS_FILE.open('w', encoding="utf8") as f:
|
||||
w = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
|
||||
w.writerow(["tag_key", "tag_value", "language", "label"])
|
||||
for row in osm_tags:
|
||||
w.writerow(row)
|
||||
|
|
1
setup.py
1
setup.py
|
@ -61,6 +61,7 @@ setup(
|
|||
'data/*.json',
|
||||
'data/*.txt',
|
||||
'data/*.ftz',
|
||||
'data/*.db',
|
||||
'infopage/*/*',
|
||||
'static/themes/simple/css/*',
|
||||
'static/themes/simple/css/*/*',
|
||||
|
|
|
@ -89,10 +89,17 @@ test.robot() {
|
|||
dump_return $?
|
||||
}
|
||||
|
||||
|
||||
test.rst() {
|
||||
build_msg TEST "[reST markup] ${RST_FILES[*]}"
|
||||
|
||||
local rst2html=rst2html
|
||||
if [ "3.8" == "$(python -c 'import sys; print(".".join([str(x) for x in sys.version_info[:2]]))')" ]; then
|
||||
rst2html=rst2html.py
|
||||
fi
|
||||
|
||||
for rst in "${RST_FILES[@]}"; do
|
||||
pyenv.cmd rst2html.py --halt error "$rst" > /dev/null || die 42 "fix issue in $rst"
|
||||
pyenv.cmd "${rst2html}" --halt error "$rst" > /dev/null || die 42 "fix issue in $rst"
|
||||
done
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue