mirror of
https://github.com/searxng/searxng.git
synced 2024-11-19 10:50:09 +01:00
61 lines
1.7 KiB
Python
61 lines
1.7 KiB
Python
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||
|
# lint: pylint
|
||
|
"""Tootfinder (social media)
|
||
|
"""
|
||
|
|
||
|
from datetime import datetime
|
||
|
from json import loads
|
||
|
from searx.utils import html_to_text
|
||
|
|
||
|
about = {
|
||
|
'website': "https://www.tootfinder.ch",
|
||
|
'official_api_documentation': "https://wiki.tootfinder.ch/index.php?name=the-tootfinder-rest-api",
|
||
|
'use_official_api': True,
|
||
|
'require_api_key': False,
|
||
|
'results': "JSON",
|
||
|
}
|
||
|
categories = ['social media']
|
||
|
|
||
|
base_url = "https://www.tootfinder.ch"
|
||
|
|
||
|
|
||
|
def request(query, params):
|
||
|
params['url'] = f"{base_url}/rest/api/search/{query}"
|
||
|
return params
|
||
|
|
||
|
|
||
|
def response(resp):
|
||
|
results = []
|
||
|
|
||
|
# the API of tootfinder has an issue that errors on server side are appended to the API response as HTML
|
||
|
# thus we're only looking for the line that contains the actual json data and ignore everything else
|
||
|
json_str = ""
|
||
|
for line in resp.text.split("\n"):
|
||
|
if line.startswith("[{"):
|
||
|
json_str = line
|
||
|
break
|
||
|
|
||
|
for result in loads(json_str):
|
||
|
thumbnail = None
|
||
|
|
||
|
attachments = result.get('media_attachments', [])
|
||
|
images = [attachment['preview_url'] for attachment in attachments if attachment['type'] == 'image']
|
||
|
if len(images) > 0:
|
||
|
thumbnail = images[0]
|
||
|
|
||
|
title = result.get('card', {}).get('title')
|
||
|
if not title:
|
||
|
title = html_to_text(result['content'])[:75]
|
||
|
|
||
|
results.append(
|
||
|
{
|
||
|
'url': result['url'],
|
||
|
'title': title,
|
||
|
'content': html_to_text(result['content']),
|
||
|
'thumbnail': thumbnail,
|
||
|
'publishedDate': datetime.strptime(result['created_at'], '%Y-%m-%d %H:%M:%S'),
|
||
|
}
|
||
|
)
|
||
|
|
||
|
return results
|