mirror of
https://github.com/searxng/searxng.git
synced 2024-11-19 02:40:11 +01:00
update jisho.py according to suggestions
This commit is contained in:
parent
934ae4e086
commit
a399248f56
@ -14,9 +14,11 @@ about = {
|
|||||||
"use_official_api": True,
|
"use_official_api": True,
|
||||||
"require_api_key": False,
|
"require_api_key": False,
|
||||||
"results": 'JSON',
|
"results": 'JSON',
|
||||||
|
"language": 'ja',
|
||||||
}
|
}
|
||||||
|
|
||||||
categories = ['dictionaries']
|
categories = ['dictionaries']
|
||||||
|
engine_type = 'online_dictionary'
|
||||||
paging = False
|
paging = False
|
||||||
|
|
||||||
URL = 'https://jisho.org'
|
URL = 'https://jisho.org'
|
||||||
@ -35,91 +37,92 @@ def response(resp):
|
|||||||
results = []
|
results = []
|
||||||
infoboxed = False
|
infoboxed = False
|
||||||
|
|
||||||
search_results = json.loads(resp.text)
|
search_results = resp.json()
|
||||||
pages = search_results.get('data', [])
|
pages = search_results.get('data', [])
|
||||||
|
|
||||||
for page in pages:
|
for page in pages:
|
||||||
# Entries that are purely from Wikipedia are excluded.
|
# Entries that are purely from Wikipedia are excluded.
|
||||||
if page['senses'][0]['parts_of_speech'][0] != 'Wikipedia definition':
|
if page['senses'][0]['parts_of_speech'] != [] and page['senses'][0]['parts_of_speech'][0] == 'Wikipedia definition':
|
||||||
# Process alternative forms
|
pass
|
||||||
japanese = page['japanese']
|
# Process alternative forms
|
||||||
alt_forms = []
|
japanese = page['japanese']
|
||||||
for title_raw in japanese:
|
alt_forms = []
|
||||||
if 'word' not in title_raw:
|
for title_raw in japanese:
|
||||||
alt_forms.append(title_raw['reading'])
|
if 'word' not in title_raw:
|
||||||
else:
|
alt_forms.append(title_raw['reading'])
|
||||||
title = title_raw['word']
|
else:
|
||||||
if 'reading' in title_raw:
|
title = title_raw['word']
|
||||||
title += ' (' + title_raw['reading'] + ')'
|
if 'reading' in title_raw:
|
||||||
alt_forms.append(title)
|
title += ' (' + title_raw['reading'] + ')'
|
||||||
# Process definitions
|
alt_forms.append(title)
|
||||||
definitions = []
|
# Process definitions
|
||||||
def_raw = page['senses']
|
definitions = []
|
||||||
for defn_raw in def_raw:
|
def_raw = page['senses']
|
||||||
extra = ''
|
for defn_raw in def_raw:
|
||||||
if not infoboxed:
|
extra = ''
|
||||||
# Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
|
|
||||||
if defn_raw['tags'] != []:
|
|
||||||
if defn_raw['info'] != []:
|
|
||||||
extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: <kana>"
|
|
||||||
else:
|
|
||||||
extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc.
|
|
||||||
elif defn_raw['info'] != []:
|
|
||||||
extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent
|
|
||||||
if defn_raw['restrictions'] != []:
|
|
||||||
extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. '
|
|
||||||
extra = extra[:-1]
|
|
||||||
definitions.append((
|
|
||||||
', '.join(defn_raw['parts_of_speech']),
|
|
||||||
'; '.join(defn_raw['english_definitions']),
|
|
||||||
extra
|
|
||||||
))
|
|
||||||
content = ''
|
|
||||||
infobox_content = '''
|
|
||||||
<small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a>
|
|
||||||
and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a>
|
|
||||||
by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small><ul>
|
|
||||||
'''
|
|
||||||
for pos, engdef, extra in definitions:
|
|
||||||
if pos == 'Wikipedia definition':
|
|
||||||
infobox_content += '</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>'
|
|
||||||
if pos == '':
|
|
||||||
infobox_content += f"<li>{engdef}"
|
|
||||||
else:
|
|
||||||
infobox_content += f"<li><i>{pos}</i>: {engdef}"
|
|
||||||
if extra != '':
|
|
||||||
infobox_content += f" ({extra})"
|
|
||||||
infobox_content += '</li>'
|
|
||||||
content += f"{engdef}. "
|
|
||||||
infobox_content += '</ul>'
|
|
||||||
|
|
||||||
# For results, we'll return the URL, all alternative forms (as title),
|
|
||||||
# and all definitions (as description) truncated to 300 characters.
|
|
||||||
results.append({
|
|
||||||
'url': urljoin(BASE_URL, page['slug']),
|
|
||||||
'title': ", ".join(alt_forms),
|
|
||||||
'content': content[:300] + (content[300:] and '...')
|
|
||||||
})
|
|
||||||
|
|
||||||
# Like Wordnik, we'll return the first result in an infobox too.
|
|
||||||
if not infoboxed:
|
if not infoboxed:
|
||||||
infoboxed = True
|
# Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
|
||||||
infobox_urls = []
|
if defn_raw['tags'] != []:
|
||||||
infobox_urls.append({
|
if defn_raw['info'] != []:
|
||||||
'title': 'Jisho.org',
|
extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: <kana>"
|
||||||
'url': urljoin(BASE_URL, page['slug'])
|
else:
|
||||||
})
|
extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc.
|
||||||
infobox = {
|
elif defn_raw['info'] != []:
|
||||||
'infobox': alt_forms[0],
|
extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent
|
||||||
'urls': infobox_urls
|
if defn_raw['restrictions'] != []:
|
||||||
}
|
extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. '
|
||||||
alt_forms.pop(0)
|
extra = extra[:-1]
|
||||||
alt_content = ''
|
definitions.append((
|
||||||
if len(alt_forms) > 0:
|
', '.join(defn_raw['parts_of_speech']),
|
||||||
alt_content = '<p><i>Other forms:</i> '
|
'; '.join(defn_raw['english_definitions']),
|
||||||
alt_content += ", ".join(alt_forms)
|
extra
|
||||||
alt_content += '</p>'
|
))
|
||||||
infobox['content'] = alt_content + infobox_content
|
content = ''
|
||||||
results.append(infobox)
|
infobox_content = '''
|
||||||
|
<small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a>
|
||||||
|
and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a>
|
||||||
|
by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small><ul>
|
||||||
|
'''
|
||||||
|
for pos, engdef, extra in definitions:
|
||||||
|
if pos == 'Wikipedia definition':
|
||||||
|
infobox_content += '</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>'
|
||||||
|
if pos == '':
|
||||||
|
infobox_content += f"<li>{engdef}"
|
||||||
|
else:
|
||||||
|
infobox_content += f"<li><i>{pos}</i>: {engdef}"
|
||||||
|
if extra != '':
|
||||||
|
infobox_content += f" ({extra})"
|
||||||
|
infobox_content += '</li>'
|
||||||
|
content += f"{engdef}. "
|
||||||
|
infobox_content += '</ul>'
|
||||||
|
|
||||||
|
# For results, we'll return the URL, all alternative forms (as title),
|
||||||
|
# and all definitions (as description) truncated to 300 characters.
|
||||||
|
results.append({
|
||||||
|
'url': urljoin(BASE_URL, page['slug']),
|
||||||
|
'title': ", ".join(alt_forms),
|
||||||
|
'content': content[:300] + (content[300:] and '...')
|
||||||
|
})
|
||||||
|
|
||||||
|
# Like Wordnik, we'll return the first result in an infobox too.
|
||||||
|
if not infoboxed:
|
||||||
|
infoboxed = True
|
||||||
|
infobox_urls = []
|
||||||
|
infobox_urls.append({
|
||||||
|
'title': 'Jisho.org',
|
||||||
|
'url': urljoin(BASE_URL, page['slug'])
|
||||||
|
})
|
||||||
|
infobox = {
|
||||||
|
'infobox': alt_forms[0],
|
||||||
|
'urls': infobox_urls
|
||||||
|
}
|
||||||
|
alt_forms.pop(0)
|
||||||
|
alt_content = ''
|
||||||
|
if len(alt_forms) > 0:
|
||||||
|
alt_content = '<p><i>Other forms:</i> '
|
||||||
|
alt_content += ", ".join(alt_forms)
|
||||||
|
alt_content += '</p>'
|
||||||
|
infobox['content'] = alt_content + infobox_content
|
||||||
|
results.append(infobox)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
Loading…
Reference in New Issue
Block a user