update jisho.py according to suggestions

2024-11-19 02:40:11 +01:00 · 2022-04-01 09:18:19 -04:00 · 2022-04-01 09:18:19 -04:00 · a399248f56
commit a399248f56
parent 934ae4e086
1 changed files with 84 additions and 81 deletions
--- a/searx/engines/jisho.py
+++ b/searx/engines/jisho.py
@ -14,9 +14,11 @@ about = {
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
    "language": 'ja',
 }
 categories = ['dictionaries']
 engine_type = 'online_dictionary'
 paging = False
 URL = 'https://jisho.org'
@ -35,91 +37,92 @@ def response(resp):
    results = []
    infoboxed = False
-    search_results = json.loads(resp.text)
+    search_results = resp.json()
    pages = search_results.get('data', [])
    for page in pages:
        # Entries that are purely from Wikipedia are excluded.
-        if page['senses'][0]['parts_of_speech'][0] != 'Wikipedia definition':
+        if page['senses'][0]['parts_of_speech'] != [] and page['senses'][0]['parts_of_speech'][0] == 'Wikipedia definition':
-            # Process alternative forms
+            pass
-            japanese = page['japanese']
+        # Process alternative forms
-            alt_forms = []
+        japanese = page['japanese']
-            for title_raw in japanese:
+        alt_forms = []
-                if 'word' not in title_raw:
+        for title_raw in japanese:
-                    alt_forms.append(title_raw['reading'])
+            if 'word' not in title_raw:
-                else:
+                alt_forms.append(title_raw['reading'])
-                    title = title_raw['word']
+            else:
-                    if 'reading' in title_raw:
+                title = title_raw['word']
-                        title += ' (' + title_raw['reading'] + ')'
+                if 'reading' in title_raw:
-                    alt_forms.append(title)
+                    title += ' (' + title_raw['reading'] + ')'
-            # Process definitions
+                alt_forms.append(title)
-            definitions = []
+        # Process definitions
-            def_raw = page['senses']
+        definitions = []
-            for defn_raw in def_raw:
+        def_raw = page['senses']
-                extra = ''
+        for defn_raw in def_raw:
-                if not infoboxed:
+            extra = ''
                    # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
                    if defn_raw['tags'] != []:
                        if defn_raw['info'] != []:
                            extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: <kana>"
                        else:
                            extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc.
                    elif defn_raw['info'] != []:
                        extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent
                    if defn_raw['restrictions'] != []:
                        extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. '
                    extra = extra[:-1]
                definitions.append((
                    ', '.join(defn_raw['parts_of_speech']),
                    '; '.join(defn_raw['english_definitions']),
                    extra
                ))
            content = ''
            infobox_content = '''
                <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a> 
                and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a> 
                by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small><ul>
                '''
            for pos, engdef, extra in definitions:
                if pos == 'Wikipedia definition':
                    infobox_content += '</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>'
                if pos == '':
                    infobox_content += f"<li>{engdef}"
                else:
                    infobox_content += f"<li><i>{pos}</i>: {engdef}"
                if extra != '':
                    infobox_content += f" ({extra})"
                infobox_content += '</li>'
                content += f"{engdef}. "
            infobox_content += '</ul>'
            # For results, we'll return the URL, all alternative forms (as title),
            # and all definitions (as description) truncated to 300 characters.
            results.append({
                'url': urljoin(BASE_URL, page['slug']),
                'title': ", ".join(alt_forms),
                'content': content[:300] + (content[300:] and '...')
            })
            # Like Wordnik, we'll return the first result in an infobox too.
            if not infoboxed:
-                infoboxed = True
+                # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
-                infobox_urls = []
+                if defn_raw['tags'] != []:
-                infobox_urls.append({
+                    if defn_raw['info'] != []:
-                    'title': 'Jisho.org',
+                        extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: <kana>"
-                    'url': urljoin(BASE_URL, page['slug'])
+                    else:
-                })
+                        extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc.
-                infobox = {
+                elif defn_raw['info'] != []:
-                    'infobox': alt_forms[0],
+                    extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent
-                    'urls': infobox_urls
+                if defn_raw['restrictions'] != []:
-                }
+                    extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. '
-                alt_forms.pop(0)
+                extra = extra[:-1]
-                alt_content = ''
+            definitions.append((
-                if len(alt_forms) > 0:
+                ', '.join(defn_raw['parts_of_speech']),
-                    alt_content = '<p><i>Other forms:</i> '
+                '; '.join(defn_raw['english_definitions']),
-                    alt_content += ", ".join(alt_forms)
+                extra
-                    alt_content += '</p>'
+            ))
-                infobox['content'] = alt_content + infobox_content
+        content = ''
-                results.append(infobox)
+        infobox_content = '''
            <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a> 
            and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a> 
            by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small><ul>
            '''
        for pos, engdef, extra in definitions:
            if pos == 'Wikipedia definition':
                infobox_content += '</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>'
            if pos == '':
                infobox_content += f"<li>{engdef}"
            else:
                infobox_content += f"<li><i>{pos}</i>: {engdef}"
            if extra != '':
                infobox_content += f" ({extra})"
            infobox_content += '</li>'
            content += f"{engdef}. "
        infobox_content += '</ul>'
        # For results, we'll return the URL, all alternative forms (as title),
        # and all definitions (as description) truncated to 300 characters.
        results.append({
            'url': urljoin(BASE_URL, page['slug']),
            'title': ", ".join(alt_forms),
            'content': content[:300] + (content[300:] and '...')
        })
        # Like Wordnik, we'll return the first result in an infobox too.
        if not infoboxed:
            infoboxed = True
            infobox_urls = []
            infobox_urls.append({
                'title': 'Jisho.org',
                'url': urljoin(BASE_URL, page['slug'])
            })
            infobox = {
                'infobox': alt_forms[0],
                'urls': infobox_urls
            }
            alt_forms.pop(0)
            alt_content = ''
            if len(alt_forms) > 0:
                alt_content = '<p><i>Other forms:</i> '
                alt_content += ", ".join(alt_forms)
                alt_content += '</p>'
            infobox['content'] = alt_content + infobox_content
            results.append(infobox)
    return results