From 3bc85c511ce58d61aea4f5a2e8043643ec8da62a Mon Sep 17 00:00:00 2001 From: Bnyro Date: Sat, 30 Sep 2023 13:44:30 +0200 Subject: [PATCH] [mod] tootfinder: implement python module --- searx/engines/tootfinder.py | 60 +++++++++++++++++++++++++++++++++++++ searx/settings.yml | 15 +--------- 2 files changed, 61 insertions(+), 14 deletions(-) create mode 100644 searx/engines/tootfinder.py diff --git a/searx/engines/tootfinder.py b/searx/engines/tootfinder.py new file mode 100644 index 000000000..b754e15a5 --- /dev/null +++ b/searx/engines/tootfinder.py @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# lint: pylint +"""Tootfinder (social media) +""" + +from datetime import datetime +from json import loads +from searx.utils import html_to_text + +about = { + 'website': "https://www.tootfinder.ch", + 'official_api_documentation': "https://wiki.tootfinder.ch/index.php?name=the-tootfinder-rest-api", + 'use_official_api': True, + 'require_api_key': False, + 'results': "JSON", +} +categories = ['social media'] + +base_url = "https://www.tootfinder.ch" + + +def request(query, params): + params['url'] = f"{base_url}/rest/api/search/{query}" + return params + + +def response(resp): + results = [] + + # the API of tootfinder has an issue that errors on server side are appended to the API response as HTML + # thus we're only looking for the line that contains the actual json data and ignore everything else + json_str = "" + for line in resp.text.split("\n"): + if line.startswith("[{"): + json_str = line + break + + for result in loads(json_str): + thumbnail = None + + attachments = result.get('media_attachments', []) + images = [attachment['preview_url'] for attachment in attachments if attachment['type'] == 'image'] + if len(images) > 0: + thumbnail = images[0] + + title = result.get('card', {}).get('title') + if not title: + title = html_to_text(result['content'])[:75] + + results.append( + { + 'url': result['url'], + 'title': title, + 'content': html_to_text(result['content']), + 'thumbnail': thumbnail, + 'publishedDate': datetime.strptime(result['created_at'], '%Y-%m-%d %H:%M:%S'), + } + ) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 828828b41..bbe28b516 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -2035,21 +2035,8 @@ engines: disabled: true - name: tootfinder - engine: json_engine - categories: ['social media'] - paging: false - search_url: https://www.tootfinder.ch/rest/api/search/{query} - url_query: uri - title_query: card/title - content_query: content - thumbnail_query: card/image + engine: tootfinder shortcut: toot - about: - website: https://tootfinder.ch/ - official_api_documentation: https://wiki.tootfinder.ch/index.php?name=the-tootfinder-rest-api - use_official_api: true - require_api_key: false - results: 'JSON' - name: wallhaven engine: wallhaven