From fd1422a67017443a0dc0773562ec98525b468bde Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sat, 30 Sep 2023 18:41:13 +0200 Subject: [PATCH] [mod] engine - simplify region & lang handling, make filters configurable Signed-off-by: Markus Heiser --- docs/dev/engines/online/radio_browser.rst | 13 + searx/data/engine_traits.json | 337 ++++++++++++++++++++++ searx/engines/radio_browser.py | 141 ++++++--- 3 files changed, 453 insertions(+), 38 deletions(-) create mode 100644 docs/dev/engines/online/radio_browser.rst diff --git a/docs/dev/engines/online/radio_browser.rst b/docs/dev/engines/online/radio_browser.rst new file mode 100644 index 000000000..a150e59c5 --- /dev/null +++ b/docs/dev/engines/online/radio_browser.rst @@ -0,0 +1,13 @@ +.. _RadioBrowser engine: + +============ +RadioBrowser +============ + +.. contents:: + :depth: 2 + :local: + :backlinks: entry + +.. automodule:: searx.engines.radio_browser + :members: diff --git a/searx/data/engine_traits.json b/searx/data/engine_traits.json index e13538aa1..aee199b30 100644 --- a/searx/data/engine_traits.json +++ b/searx/data/engine_traits.json @@ -4932,6 +4932,343 @@ "zh-HK": "zh_HK" } }, + "radio browser": { + "all_locale": null, + "custom": { + "countrycodes": [ + "AD", + "AE", + "AF", + "AG", + "AL", + "AM", + "AO", + "AQ", + "AR", + "AS", + "AT", + "AU", + "AW", + "AZ", + "BA", + "BB", + "BD", + "BE", + "BF", + "BG", + "BH", + "BI", + "BJ", + "BM", + "BN", + "BO", + "BQ", + "BR", + "BS", + "BT", + "BW", + "BY", + "BZ", + "CA", + "CC", + "CD", + "CF", + "CH", + "CI", + "CK", + "CL", + "CM", + "CN", + "CO", + "CR", + "CU", + "CV", + "CW", + "CY", + "CZ", + "DE", + "DK", + "DM", + "DO", + "DZ", + "EC", + "EE", + "EG", + "ES", + "ET", + "FI", + "FJ", + "FK", + "FO", + "FR", + "GA", + "GB", + "GD", + "GE", + "GF", + "GG", + "GH", + "GI", + "GL", + "GN", + "GP", + "GQ", + "GR", + "GS", + "GT", + "GU", + "GW", + "GY", + "HK", + "HN", + "HR", + "HT", + "HU", + "ID", + "IE", + "IL", + "IM", + "IN", + "IO", + "IQ", + "IR", + "IS", + "IT", + "JM", + "JO", + "JP", + "KE", + "KG", + "KH", + "KM", + "KN", + "KP", + "KR", + "KW", + "KY", + "KZ", + "LB", + "LC", + "LK", + "LT", + "LU", + "LV", + "LY", + "MA", + "MC", + "MD", + "ME", + "MG", + "MK", + "ML", + "MM", + "MN", + "MO", + "MQ", + "MT", + "MU", + "MW", + "MX", + "MY", + "MZ", + "NA", + "NC", + "NE", + "NF", + "NG", + "NI", + "NL", + "NO", + "NP", + "NZ", + "OM", + "PA", + "PE", + "PF", + "PH", + "PK", + "PL", + "PM", + "PR", + "PS", + "PT", + "PY", + "QA", + "RE", + "RO", + "RS", + "RU", + "RW", + "SA", + "SC", + "SD", + "SE", + "SG", + "SH", + "SI", + "SJ", + "SK", + "SL", + "SM", + "SN", + "SO", + "SR", + "ST", + "SV", + "SY", + "SZ", + "TC", + "TD", + "TF", + "TG", + "TH", + "TJ", + "TM", + "TN", + "TO", + "TR", + "TT", + "TW", + "TZ", + "UA", + "UG", + "UM", + "US", + "UY", + "UZ", + "VA", + "VC", + "VE", + "VG", + "VI", + "VN", + "VU", + "WF", + "XK", + "YE", + "YT", + "ZA", + "ZM", + "ZW" + ] + }, + "data_type": "traits_v1", + "languages": { + "af": "afrikaans", + "ak": "akan", + "am": "amharic", + "ar": "arabic", + "ast": "asturian", + "az": "azerbaijani", + "be": "belarusian", + "bg": "bulgarian", + "bm": "bambara", + "bn": "bengali", + "bo": "tibetan", + "br": "breton", + "bs": "bosnian", + "ca": "catalan", + "cs": "czech", + "cv": "chuvash", + "cy": "welsh", + "da": "danish", + "de": "german", + "dsb": "lower sorbian", + "dz": "dzongkha", + "el": "greek", + "en": "english", + "eo": "esperanto", + "es": "spanish", + "et": "estonian", + "eu": "basque", + "fa": "persian", + "fi": "finnish", + "fil": "tagalog", + "fo": "faroese", + "fr": "french", + "ga": "irish", + "gd": "gaelic", + "gl": "galician", + "gsw": "swiss german", + "gu": "gujarati", + "gv": "manx", + "ha": "hausa", + "he": "hebrew", + "hi": "hindi", + "hr": "croatian", + "hsb": "upper sorbian", + "hu": "hungarian", + "hy": "armenian", + "id": "indonesian", + "is": "icelandic", + "it": "italian", + "ja": "japanese", + "jv": "javanese", + "ka": "georgian", + "kk": "kazakh", + "kl": "kalaallisut", + "km": "khmer", + "kn": "kannada", + "ko": "korean", + "ku": "kurdish", + "lb": "luxembourgish", + "ln": "lingala", + "lt": "lithuanian", + "lv": "latvian", + "mg": "malagasy", + "mk": "macedonian", + "ml": "malayalam", + "mn": "mongolian", + "mr": "marathi", + "ms": "malay", + "mt": "maltese", + "my": "burmese", + "nds": "low german", + "ne": "nepali", + "nl": "dutch", + "no": "norwegian", + "oc": "occitan", + "om": "oromo", + "os": "ossetian", + "pa": "panjabi", + "pl": "polish", + "pt": "portuguese", + "qu": "quechua", + "rm": "romansh", + "ro": "romanian", + "ru": "russian", + "rw": "kinyarwanda", + "sa": "sanskrit", + "sc": "sardinian", + "sd": "sindhi", + "si": "sinhala", + "sk": "slovak", + "sl": "slovenian", + "so": "somali", + "sq": "albanian", + "sr": "serbian", + "sv": "swedish", + "sw": "swahili", + "ta": "tamil", + "te": "telugu", + "tg": "tajik", + "th": "thai", + "tk": "turkmen", + "tr": "turkish", + "tt": "tatar", + "uk": "ukrainian", + "ur": "urdu", + "uz": "uzbek", + "vi": "vietnamese", + "wo": "wolof", + "xh": "xhosa", + "yi": "yiddish", + "yue": "cantonese", + "zh": "chinese", + "zh_Hans": "mandarin" + }, + "regions": {} + }, "sepiasearch": { "all_locale": null, "custom": {}, diff --git a/searx/engines/radio_browser.py b/searx/engines/radio_browser.py index 758ba1b3d..6b60b398e 100644 --- a/searx/engines/radio_browser.py +++ b/searx/engines/radio_browser.py @@ -1,30 +1,57 @@ # SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint -"""Radio browser (music) +"""Search radio stations from RadioBrowser by `Advanced station search API`_. + +.. _Advanced station search API: + https://de1.api.radio-browser.info/#Advanced_station_search + """ from urllib.parse import urlencode import babel +from flask_babel import gettext from searx.network import get from searx.enginelib.traits import EngineTraits -from searx.locales import language_tag, region_tag +from searx.locales import language_tag traits: EngineTraits about = { "website": 'https://www.radio-browser.info/', + "wikidata_id": 'Q111664849', "official_api_documentation": 'https://de1.api.radio-browser.info/', "use_official_api": True, "require_api_key": False, "results": 'JSON', } paging = True -categories = ['music'] +categories = ['music', 'radio'] base_url = "https://de1.api.radio-browser.info" # see https://api.radio-browser.info/ for all nodes number_of_results = 10 +station_filters = [] # ['countrycode', 'language'] +"""A list of filters to be applied to the search of radio stations. By default +none filters are applied. Valid filters are: + +``language`` + Filter stations by selected language. For instance the ``de`` from ``:de-AU`` + will be translated to `german` and used in the argument ``language=``. + +``countrycode`` + Filter stations by selected country. The 2-digit countrycode of the station + comes from the region the user selected. For instance ``:de-AU`` will filter + out all stations not in ``AU``. + +.. note:: + + RadioBrowser has registered a lot of languages and countrycodes unknown to + :py:obj:`babel` and note that when searching for radio stations, users are + more likely to search by name than by region or language. + +""" + def request(query, params): args = { @@ -35,13 +62,17 @@ def request(query, params): 'hidebroken': 'true', 'reverse': 'true', } - lang = traits.get_language(params['searxng_locale'], None) - if lang is not None: - args['language'] = lang - region = traits.get_region(params['searxng_locale'], None) - if region is not None: - args['countrycode'] = region.split('-')[1] + if 'language' in station_filters: + lang = traits.get_language(params['searxng_locale']) # type: ignore + if lang: + args['language'] = lang + + if 'countrycode' in station_filters: + if len(params['searxng_locale'].split('-')) > 1: + countrycode = params['searxng_locale'].split('-')[-1].upper() + if countrycode in traits.custom['countrycodes']: # type: ignore + args['countrycode'] = countrycode params['url'] = f"{base_url}/json/stations/search?{urlencode(args)}" return params @@ -50,22 +81,43 @@ def request(query, params): def response(resp): results = [] - for result in resp.json(): + json_resp = resp.json() + + for result in json_resp: url = result['homepage'] if not url: url = result['url_resolved'] + content = [] + tags = ', '.join(result.get('tags', '').split(',')) + if tags: + content.append(tags) + for x in ['state', 'country']: + v = result.get(x) + if v: + v = str(v).strip() + content.append(v) + + metadata = [] + codec = result.get('codec') + if codec and codec.lower() != 'unknown': + metadata.append(f'{codec} ' + gettext('radio')) + for x, y in [ + (gettext('bitrate'), 'bitrate'), + (gettext('votes'), 'votes'), + (gettext('clicks'), 'clickcount'), + ]: + v = result.get(y) + if v: + v = str(v).strip() + metadata.append(f"{x} {v}") results.append( { - 'template': 'videos.html', 'url': url, 'title': result['name'], - 'thumbnail': result.get('favicon', '').replace("http://", "https://"), - 'content': result['country'] - + " / " - + result["tags"] - + f" / {result['votes']} votes" - + f" / {result['clickcount']} clicks", + 'img_src': result.get('favicon', '').replace("http://", "https://"), + 'content': ' | '.join(content), + 'metadata': ' | '.join(metadata), 'iframe_src': result['url_resolved'].replace("http://", "https://"), } ) @@ -74,38 +126,51 @@ def response(resp): def fetch_traits(engine_traits: EngineTraits): - language_list = get(f'{base_url}/json/languages').json() + """Fetch languages and countrycodes from RadioBrowser - country_list = get(f'{base_url}/json/countrycodes').json() + - ``traits.languages``: `list of languages API`_ + - ``traits.custom['countrycodes']``: `list of countries API`_ + + .. _list of countries API: https://de1.api.radio-browser.info/#List_of_countries + .. _list of languages API: https://de1.api.radio-browser.info/#List_of_languages + """ + # pylint: disable=import-outside-toplevel + + from babel.core import get_global + + babel_reg_list = get_global("territory_languages").keys() + + language_list = get(f'{base_url}/json/languages').json() # type: ignore + country_list = get(f'{base_url}/json/countries').json() # type: ignore for lang in language_list: - # the language doesn't have any iso code, and hence can't be parsed - if not lang['iso_639']: + babel_lang = lang.get('iso_639') + if not babel_lang: + # the language doesn't have any iso code, and hence can't be parsed + # print(f"ERROR: lang - no iso code in {lang}") continue - try: - lang_tag = lang['iso_639'] - sxng_tag = language_tag(babel.Locale.parse(lang_tag, sep="-")) + sxng_tag = language_tag(babel.Locale.parse(babel_lang, sep="-")) except babel.UnknownLocaleError: - print("ERROR: %s is unknown by babel" % lang_tag) + # print(f"ERROR: language tag {babel_lang} is unknown by babel") continue + eng_tag = lang['name'] conflict = engine_traits.languages.get(sxng_tag) if conflict: + if conflict != eng_tag: + print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) continue + engine_traits.languages[sxng_tag] = eng_tag - engine_traits.languages[sxng_tag] = lang['name'] + countrycodes = set() + for region in country_list: + if region['iso_3166_1'] not in babel_reg_list: + print(f"ERROR: region tag {region['iso_3166_1']} is unknown by babel") + continue + countrycodes.add(region['iso_3166_1']) - for region in country_list: - try: - reg_tag = f"{lang['iso_639']}-{region['name']}" - sxng_tag = region_tag(babel.Locale.parse(reg_tag, sep="-")) - except babel.UnknownLocaleError: - continue - - conflict = engine_traits.regions.get(sxng_tag) - if conflict: - continue - - engine_traits.regions[sxng_tag] = reg_tag + countrycodes = list(countrycodes) + countrycodes.sort() + engine_traits.custom['countrycodes'] = countrycodes