jisho : code refactoring

2024-11-19 02:40:11 +01:00 · 2022-04-02 15:21:58 +02:00 · 2022-04-02 15:21:58 +02:00 · 74c7aee9ec
commit 74c7aee9ec
parent 19fa0095a0
1 changed files with 76 additions and 67 deletions
--- a/searx/engines/jisho.py
+++ b/searx/engines/jisho.py
@ -17,7 +17,6 @@ about = {
 }
 categories = ['dictionaries']
 engine_type = 'online_dictionary'
 paging = False
 URL = 'https://jisho.org'
@ -34,19 +33,19 @@ def request(query, params):
 def response(resp):
    results = []
-    infoboxed = False
+    first_result = True
    search_results = resp.json()
    pages = search_results.get('data', [])
-    for page in pages:
+    for page in search_results.get('data', []):
        # Entries that are purely from Wikipedia are excluded.
-        if page['senses'][0]['parts_of_speech'] != [] and page['senses'][0]['parts_of_speech'][0] == 'Wikipedia definition':
+        parts_of_speech = page.get('senses') and page['senses'][0].get('parts_of_speech')
        if parts_of_speech and parts_of_speech[0] == 'Wikipedia definition':
            pass
        # Process alternative forms
        japanese = page['japanese']
        alt_forms = []
-        for title_raw in japanese:
+        for title_raw in page['japanese']:
            if 'word' not in title_raw:
                alt_forms.append(title_raw['reading'])
            else:
@ -54,74 +53,84 @@ def response(resp):
                if 'reading' in title_raw:
                    title += ' (' + title_raw['reading'] + ')'
                alt_forms.append(title)
-        # Process definitions
+        
-        definitions = []
+        #
-        def_raw = page['senses']
+        result_url = urljoin(BASE_URL, page['slug'])
-        for defn_raw in def_raw:
+        definitions = get_definitions(page)
            extra = ''
            if not infoboxed:
                # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
                if defn_raw['tags'] != []:
                    if defn_raw['info'] != []:
                        extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: <kana>"
                    else:
                        extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc.
                elif defn_raw['info'] != []:
                    extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent
                if defn_raw['restrictions'] != []:
                    extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. '
                extra = extra[:-1]
            definitions.append((
                ', '.join(defn_raw['parts_of_speech']),
                '; '.join(defn_raw['english_definitions']),
                extra
            ))
        content = ''
        infobox_content = '''
            <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a> 
            and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a> 
            by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small><ul>
            '''
        for pos, engdef, extra in definitions:
            if pos == 'Wikipedia definition':
                infobox_content += '</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>'
            if pos == '':
                infobox_content += f"<li>{engdef}"
            else:
                infobox_content += f"<li><i>{pos}</i>: {engdef}"
            if extra != '':
                infobox_content += f" ({extra})"
            infobox_content += '</li>'
            content += f"{engdef}. "
        infobox_content += '</ul>'
        # For results, we'll return the URL, all alternative forms (as title),
        # and all definitions (as description) truncated to 300 characters.
        content = " ".join(f"{engdef}." for _, engdef, _ in definitions)
        results.append({
-            'url': urljoin(BASE_URL, page['slug']),
+            'url': result_url,
            'title': ", ".join(alt_forms),
            'content': content[:300] + (content[300:] and '...')
        })
        # Like Wordnik, we'll return the first result in an infobox too.
-        if not infoboxed:
+        if first_result:
-            infoboxed = True
+            first_result = False
-            infobox_urls = []
+            results.append(get_infobox(alt_forms, result_url, definitions))
            infobox_urls.append({
                'title': 'Jisho.org',
                'url': urljoin(BASE_URL, page['slug'])
            })
            infobox = {
                'infobox': alt_forms[0],
                'urls': infobox_urls
            }
            alt_forms.pop(0)
            alt_content = ''
            if len(alt_forms) > 0:
                alt_content = '<p><i>Other forms:</i> '
                alt_content += ", ".join(alt_forms)
                alt_content += '</p>'
            infobox['content'] = alt_content + infobox_content
            results.append(infobox)
    return results
 def get_definitions(page):
    # Process definitions
    definitions = []
    for defn_raw in page['senses']:
        extra = []
        # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
        if defn_raw.get('tags'):
            if defn_raw.get('info'):
                # "usually written as kana: <kana>"
                extra.append(defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ')
            else:
                # abbreviation, archaism, etc.
                extra.append(', '.join(defn_raw['tags']) + '. ')
        elif defn_raw.get('info'):
            # inconsistent
            extra.append(', '.join(defn_raw['info']).capitalize() + '. ')
        if defn_raw.get('restrictions'):
            extra.append('Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ')
        definitions.append((
            ', '.join(defn_raw['parts_of_speech']),
            '; '.join(defn_raw['english_definitions']),
            ''.join(extra)[:-1],
        ))
    return definitions
 def get_infobox(alt_forms, result_url, definitions):
    infobox_content = []
    # title & alt_forms
    infobox_title = alt_forms[0]
    if len(alt_forms) > 1:
        infobox_content.append(f'<p><i>Other forms:</i> {", ".join(alt_forms[1:])}</p>')
    # definitions
    infobox_content.append('''
        <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a> 
        and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a> 
        by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small>
        <ul>
    ''')
    for pos, engdef, extra in definitions:
        if pos == 'Wikipedia definition':
            infobox_content.append('</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>')
        pos = f'<i>{pos}</i>: ' if pos else ''
        extra = f' ({extra})' if extra else ''
        infobox_content.append(f'<li>{pos}{engdef}{extra}</li>')
    infobox_content.append('</ul>')
    #
    return {
        'infobox': infobox_title,
        'content': ''.join(infobox_content),
        'urls': [
            {
                'title': 'Jisho.org',
                'url': result_url,
            }
        ]
    }