mirror of
https://github.com/searxng/searxng.git
synced 2024-11-17 18:00:12 +01:00
[feat] engine: implementation of German news, Tagesschau
Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
2256ba2ffb
commit
e25d1c7288
101
searx/engines/tagesschau.py
Normal file
101
searx/engines/tagesschau.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# lint: pylint
|
||||||
|
"""ARD: `Tagesschau API`_
|
||||||
|
|
||||||
|
The Tagesschau is a news program of the ARD. Via the `Tagesschau API`_, current
|
||||||
|
news and media reports are available in JSON format. The `Bundesstelle für Open
|
||||||
|
Data`_ offers a `OpenAPI`_ portal at bundDEV_ where APIs are documented an can
|
||||||
|
be tested.
|
||||||
|
|
||||||
|
This SearXNG engine uses the `/api2u/search`_ API.
|
||||||
|
|
||||||
|
.. _/api2u/search: http://tagesschau.api.bund.dev/
|
||||||
|
.. _bundDEV: https://bund.dev/apis
|
||||||
|
.. _Bundesstelle für Open Data: https://github.com/bundesAPI
|
||||||
|
.. _Tagesschau API: https://github.com/AndreasFischer1985/tagesschau-api/blob/main/README_en.md
|
||||||
|
.. _OpenAPI: https://swagger.io/specification/
|
||||||
|
|
||||||
|
"""
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
import re
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger: logging.Logger
|
||||||
|
|
||||||
|
about = {
|
||||||
|
'website': "https://tagesschau.de",
|
||||||
|
'wikidata_id': "Q703907",
|
||||||
|
'official_api_documentation': None,
|
||||||
|
'use_official_api': True,
|
||||||
|
'require_api_key': False,
|
||||||
|
'results': 'JSON',
|
||||||
|
'language': 'de',
|
||||||
|
}
|
||||||
|
categories = ['general', 'news']
|
||||||
|
paging = True
|
||||||
|
|
||||||
|
results_per_page = 10
|
||||||
|
base_url = "https://www.tagesschau.de"
|
||||||
|
|
||||||
|
|
||||||
|
def request(query, params):
|
||||||
|
args = {
|
||||||
|
'searchText': query,
|
||||||
|
'pageSize': results_per_page,
|
||||||
|
'resultPage': params['pageno'] - 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
params['url'] = f"{base_url}/api2u/search?{urlencode(args)}"
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def response(resp):
|
||||||
|
results = []
|
||||||
|
|
||||||
|
json = resp.json()
|
||||||
|
|
||||||
|
for item in json['searchResults']:
|
||||||
|
item_type = item.get('type')
|
||||||
|
if item_type in ('story', 'webview'):
|
||||||
|
results.append(_story(item))
|
||||||
|
elif item_type == 'video':
|
||||||
|
results.append(_video(item))
|
||||||
|
else:
|
||||||
|
logger.error("unknow result type: %s", item_type)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def _story(item):
|
||||||
|
return {
|
||||||
|
'title': item['title'],
|
||||||
|
'thumbnail': item.get('teaserImage', {}).get('imageVariants', {}).get('16x9-256'),
|
||||||
|
'publishedDate': datetime.strptime(item['date'][:19], '%Y-%m-%dT%H:%M:%S'),
|
||||||
|
'content': item['firstSentence'],
|
||||||
|
'url': item['shareURL'],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _video(item):
|
||||||
|
video_url = item['streams']['h264s']
|
||||||
|
title = item['title']
|
||||||
|
|
||||||
|
if "_vapp.mxf" in title:
|
||||||
|
title = title.replace("_vapp.mxf", "")
|
||||||
|
title = re.sub(r"APP\d+ (FC-)?", "", title, count=1)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'template': 'videos.html',
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': item.get('teaserImage', {}).get('imageVariants', {}).get('16x9-256'),
|
||||||
|
'publishedDate': datetime.strptime(item['date'][:19], '%Y-%m-%dT%H:%M:%S'),
|
||||||
|
'content': item.get('firstSentence', ''),
|
||||||
|
'iframe_src': video_url,
|
||||||
|
'url': video_url,
|
||||||
|
}
|
@ -1431,6 +1431,11 @@ engines:
|
|||||||
# WHERE title LIKE :wildcard OR description LIKE :wildcard
|
# WHERE title LIKE :wildcard OR description LIKE :wildcard
|
||||||
# ORDER BY duration DESC
|
# ORDER BY duration DESC
|
||||||
|
|
||||||
|
- name: tagesschau
|
||||||
|
engine: tagesschau
|
||||||
|
shortcut: ts
|
||||||
|
disabled: true
|
||||||
|
|
||||||
# Requires Tor
|
# Requires Tor
|
||||||
- name: torch
|
- name: torch
|
||||||
engine: xpath
|
engine: xpath
|
||||||
|
Loading…
Reference in New Issue
Block a user