From 74c7aee9ec52e6b954e48817501a334f23a40e25 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Sat, 2 Apr 2022 15:21:58 +0200 Subject: [PATCH] jisho : code refactoring --- searx/engines/jisho.py | 143 ++++++++++++++++++++++------------------- 1 file changed, 76 insertions(+), 67 deletions(-) diff --git a/searx/engines/jisho.py b/searx/engines/jisho.py index a34d8e421..87bbe983d 100644 --- a/searx/engines/jisho.py +++ b/searx/engines/jisho.py @@ -17,7 +17,6 @@ about = { } categories = ['dictionaries'] -engine_type = 'online_dictionary' paging = False URL = 'https://jisho.org' @@ -34,19 +33,19 @@ def request(query, params): def response(resp): results = [] - infoboxed = False + first_result = True search_results = resp.json() - pages = search_results.get('data', []) - for page in pages: + for page in search_results.get('data', []): # Entries that are purely from Wikipedia are excluded. - if page['senses'][0]['parts_of_speech'] != [] and page['senses'][0]['parts_of_speech'][0] == 'Wikipedia definition': + parts_of_speech = page.get('senses') and page['senses'][0].get('parts_of_speech') + if parts_of_speech and parts_of_speech[0] == 'Wikipedia definition': pass + # Process alternative forms - japanese = page['japanese'] alt_forms = [] - for title_raw in japanese: + for title_raw in page['japanese']: if 'word' not in title_raw: alt_forms.append(title_raw['reading']) else: @@ -54,74 +53,84 @@ def response(resp): if 'reading' in title_raw: title += ' (' + title_raw['reading'] + ')' alt_forms.append(title) - # Process definitions - definitions = [] - def_raw = page['senses'] - for defn_raw in def_raw: - extra = '' - if not infoboxed: - # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions. - if defn_raw['tags'] != []: - if defn_raw['info'] != []: - extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: " - else: - extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc. - elif defn_raw['info'] != []: - extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent - if defn_raw['restrictions'] != []: - extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ' - extra = extra[:-1] - definitions.append(( - ', '.join(defn_raw['parts_of_speech']), - '; '.join(defn_raw['english_definitions']), - extra - )) - content = '' - infobox_content = ''' - JMdict - and JMnedict - by EDRDG, CC BY-SA 3.0.Wikipedia, CC BY-SA 3.0.' + # + result_url = urljoin(BASE_URL, page['slug']) + definitions = get_definitions(page) + # For results, we'll return the URL, all alternative forms (as title), # and all definitions (as description) truncated to 300 characters. + content = " ".join(f"{engdef}." for _, engdef, _ in definitions) results.append({ - 'url': urljoin(BASE_URL, page['slug']), + 'url': result_url, 'title': ", ".join(alt_forms), 'content': content[:300] + (content[300:] and '...') }) # Like Wordnik, we'll return the first result in an infobox too. - if not infoboxed: - infoboxed = True - infobox_urls = [] - infobox_urls.append({ - 'title': 'Jisho.org', - 'url': urljoin(BASE_URL, page['slug']) - }) - infobox = { - 'infobox': alt_forms[0], - 'urls': infobox_urls - } - alt_forms.pop(0) - alt_content = '' - if len(alt_forms) > 0: - alt_content = '

Other forms: ' - alt_content += ", ".join(alt_forms) - alt_content += '

' - infobox['content'] = alt_content + infobox_content - results.append(infobox) + if first_result: + first_result = False + results.append(get_infobox(alt_forms, result_url, definitions)) return results + + +def get_definitions(page): + # Process definitions + definitions = [] + for defn_raw in page['senses']: + extra = [] + # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions. + if defn_raw.get('tags'): + if defn_raw.get('info'): + # "usually written as kana: " + extra.append(defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ') + else: + # abbreviation, archaism, etc. + extra.append(', '.join(defn_raw['tags']) + '. ') + elif defn_raw.get('info'): + # inconsistent + extra.append(', '.join(defn_raw['info']).capitalize() + '. ') + if defn_raw.get('restrictions'): + extra.append('Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ') + definitions.append(( + ', '.join(defn_raw['parts_of_speech']), + '; '.join(defn_raw['english_definitions']), + ''.join(extra)[:-1], + )) + return definitions + + +def get_infobox(alt_forms, result_url, definitions): + infobox_content = [] + # title & alt_forms + infobox_title = alt_forms[0] + if len(alt_forms) > 1: + infobox_content.append(f'

Other forms: {", ".join(alt_forms[1:])}

') + + # definitions + infobox_content.append(''' + JMdict + and JMnedict + by EDRDG, CC BY-SA 3.0. +
    + ''') + for pos, engdef, extra in definitions: + if pos == 'Wikipedia definition': + infobox_content.append('
Wikipedia, CC BY-SA 3.0.
    ') + pos = f'{pos}: ' if pos else '' + extra = f' ({extra})' if extra else '' + infobox_content.append(f'
  • {pos}{engdef}{extra}
  • ') + infobox_content.append('
') + + # + return { + 'infobox': infobox_title, + 'content': ''.join(infobox_content), + 'urls': [ + { + 'title': 'Jisho.org', + 'url': result_url, + } + ] + }