searxng/searx/engines/invidious.py

# SPDX-License-Identifier: AGPL-3.0-or-later
"""Invidious (Videos)
"""

import time
import random
from urllib.parse import quote_plus, urlparse
from dateutil import parser

from searx.utils import humanize_number

# about
about = {
    "website": 'https://api.invidious.io/',
    "wikidata_id": 'Q79343316',
    "official_api_documentation": 'https://github.com/iv-org/documentation/blob/master/API.md',
    "use_official_api": True,
    "require_api_key": False,
    "results": 'JSON',
}

# engine dependent config
categories = ["videos", "music"]
paging = True
time_range_support = True

# base_url can be overwritten by a list of URLs in the settings.yml
base_url = 'https://vid.puffyan.us'


def request(query, params):
    time_range_dict = {
        "day": "today",
        "week": "week",
        "month": "month",
        "year": "year",
    }

    if isinstance(base_url, list):
        params["base_url"] = random.choice(base_url)
    else:
        params["base_url"] = base_url

    search_url = params["base_url"] + "/api/v1/search?q={query}"
    params["url"] = search_url.format(query=quote_plus(query)) + "&page={pageno}".format(pageno=params["pageno"])

    if params["time_range"] in time_range_dict:
        params["url"] += "&date={timerange}".format(timerange=time_range_dict[params["time_range"]])

    if params["language"] != "all":
        lang = params["language"].split("-")
        if len(lang) == 2:
            params["url"] += "&range={lrange}".format(lrange=lang[1])

    return params


def response(resp):
    results = []

    search_results = resp.json()
    base_invidious_url = resp.search_params['base_url'] + "/watch?v="

    for result in search_results:
        rtype = result.get("type", None)
        if rtype == "video":
            videoid = result.get("videoId", None)
            if not videoid:
                continue

            url = base_invidious_url + videoid
            thumbs = result.get("videoThumbnails", [])
            thumb = next((th for th in thumbs if th["quality"] == "sddefault"), None)
            if thumb:
                thumbnail = thumb.get("url", "")
            else:
                thumbnail = ""

            # some instances return a partial thumbnail url
            # we check if the url is partial, and prepend the base_url if it is
            if thumbnail and not urlparse(thumbnail).netloc:
                thumbnail = resp.search_params['base_url'] + thumbnail

            publishedDate = parser.parse(time.ctime(result.get("published", 0)))
            length = time.gmtime(result.get("lengthSeconds"))
            if length.tm_hour:
                length = time.strftime("%H:%M:%S", length)
            else:
                length = time.strftime("%M:%S", length)

            results.append(
                {
                    "url": url,
                    "title": result.get("title", ""),
                    "content": result.get("description", ""),
                    "length": length,
                    "views": humanize_number(result['viewCount']),
                    "template": "videos.html",
                    "author": result.get("author"),
                    "publishedDate": publishedDate,
                    "iframe_src": resp.search_params['base_url'] + '/embed/' + videoid,
                    "thumbnail": thumbnail,
                }
            )

    return results
[enh] engines: add about variable move meta information from comment to the about variable so the preferences, the documentation can show these information 2021-01-13 11:31:25 +01:00			`# SPDX-License-Identifier: AGPL-3.0-or-later`
[pylint] engines/invidious.py Fix remarks from pylint and remove usless comments Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2022-02-04 15:20:57 +01:00			`"""Invidious (Videos)`
[enh] engines: add about variable move meta information from comment to the about variable so the preferences, the documentation can show these information 2021-01-13 11:31:25 +01:00			`"""`
[enh] add invidious engine. (#1657) closes #1372 2019-08-02 13:25:25 +02:00
			`import time`
[fix] Invidious engine by enabling requests by randomly picking amongst working instances 2021-01-08 13:27:54 +01:00			`import random`
[fix] engine - invidious thumbnails 2023-09-13 07:36:41 +02:00			`from urllib.parse import quote_plus, urlparse`
[pylint] engines/invidious.py Fix remarks from pylint and remove usless comments Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2022-02-04 15:20:57 +01:00			`from dateutil import parser`
[enh] add invidious engine. (#1657) closes #1372 2019-08-02 13:25:25 +02:00
[feat] videos template: support for view count 2024-07-20 21:27:12 +02:00			`from searx.utils import humanize_number`

[enh] engines: add about variable move meta information from comment to the about variable so the preferences, the documentation can show these information 2021-01-13 11:31:25 +01:00			`# about`
			`about = {`
[fix] Update about section of Invidious Another website and new documentation 2021-09-01 20:55:06 +02:00			`"website": 'https://api.invidious.io/',`
[enh] engines: add about variable move meta information from comment to the about variable so the preferences, the documentation can show these information 2021-01-13 11:31:25 +01:00			`"wikidata_id": 'Q79343316',`
[fix] Update about section of Invidious Another website and new documentation 2021-09-01 20:55:06 +02:00			`"official_api_documentation": 'https://github.com/iv-org/documentation/blob/master/API.md',`
[enh] engines: add about variable move meta information from comment to the about variable so the preferences, the documentation can show these information 2021-01-13 11:31:25 +01:00			`"use_official_api": True,`
			`"require_api_key": False,`
			`"results": 'JSON',`
			`}`

[enh] add invidious engine. (#1657) closes #1372 2019-08-02 13:25:25 +02:00			`# engine dependent config`
			`categories = ["videos", "music"]`
			`paging = True`
			`time_range_support = True`

[fix] invidious engine: store random base_url in param Two different threads ( = two different user queries) can call the request function in a row and then the response function. The namespace will be same since this is the same engine. To keep exactly the same value ``base_url`` must be stored in params and then retrieve using ``resp.search_params["base_url"]``. Suggested-by: @dalf https://github.com/searxng/searxng/pull/862#discussion_r799324861 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2022-02-04 15:16:23 +01:00			`# base_url can be overwritten by a list of URLs in the settings.yml`
			`base_url = 'https://vid.puffyan.us'`
[enh] add invidious engine. (#1657) closes #1372 2019-08-02 13:25:25 +02:00

			`def request(query, params):`
			`time_range_dict = {`
			`"day": "today",`
			`"week": "week",`
			`"month": "month",`
			`"year": "year",`
			`}`
[fix] Invidious engine by enabling requests by randomly picking amongst working instances 2021-01-08 13:27:54 +01:00
			`if isinstance(base_url, list):`
[fix] invidious engine: store random base_url in param Two different threads ( = two different user queries) can call the request function in a row and then the response function. The namespace will be same since this is the same engine. To keep exactly the same value ``base_url`` must be stored in params and then retrieve using ``resp.search_params["base_url"]``. Suggested-by: @dalf https://github.com/searxng/searxng/pull/862#discussion_r799324861 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2022-02-04 15:16:23 +01:00			`params["base_url"] = random.choice(base_url)`
[fix] Invidious engine by enabling requests by randomly picking amongst working instances 2021-01-08 13:27:54 +01:00			`else:`
[fix] invidious engine: store random base_url in param Two different threads ( = two different user queries) can call the request function in a row and then the response function. The namespace will be same since this is the same engine. To keep exactly the same value ``base_url`` must be stored in params and then retrieve using ``resp.search_params["base_url"]``. Suggested-by: @dalf https://github.com/searxng/searxng/pull/862#discussion_r799324861 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2022-02-04 15:16:23 +01:00			`params["base_url"] = base_url`
[fix] Invidious engine by enabling requests by randomly picking amongst working instances 2021-01-08 13:27:54 +01:00
[fix] invidious engine: store random base_url in param Two different threads ( = two different user queries) can call the request function in a row and then the response function. The namespace will be same since this is the same engine. To keep exactly the same value ``base_url`` must be stored in params and then retrieve using ``resp.search_params["base_url"]``. Suggested-by: @dalf https://github.com/searxng/searxng/pull/862#discussion_r799324861 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2022-02-04 15:16:23 +01:00			`search_url = params["base_url"] + "/api/v1/search?q={query}"`
[format.python] initial formatting of the python code This patch was generated by black [1]:: make format.python [1] https://github.com/psf/black Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-12-27 09:26:22 +01:00			`params["url"] = search_url.format(query=quote_plus(query)) + "&page={pageno}".format(pageno=params["pageno"])`
[enh] add invidious engine. (#1657) closes #1372 2019-08-02 13:25:25 +02:00
			`if params["time_range"] in time_range_dict:`
[format.python] initial formatting of the python code This patch was generated by black [1]:: make format.python [1] https://github.com/psf/black Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-12-27 09:26:22 +01:00			`params["url"] += "&date={timerange}".format(timerange=time_range_dict[params["time_range"]])`
[enh] add invidious engine. (#1657) closes #1372 2019-08-02 13:25:25 +02:00
			`if params["language"] != "all":`
			`lang = params["language"].split("-")`
			`if len(lang) == 2:`
			`params["url"] += "&range={lrange}".format(lrange=lang[1])`

			`return params`


			`def response(resp):`
			`results = []`

			`search_results = resp.json()`
[fix] invidious engine: store random base_url in param Two different threads ( = two different user queries) can call the request function in a row and then the response function. The namespace will be same since this is the same engine. To keep exactly the same value ``base_url`` must be stored in params and then retrieve using ``resp.search_params["base_url"]``. Suggested-by: @dalf https://github.com/searxng/searxng/pull/862#discussion_r799324861 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2022-02-04 15:16:23 +01:00			`base_invidious_url = resp.search_params['base_url'] + "/watch?v="`
[enh] add invidious engine. (#1657) closes #1372 2019-08-02 13:25:25 +02:00
			`for result in search_results:`
			`rtype = result.get("type", None)`
			`if rtype == "video":`
			`videoid = result.get("videoId", None)`
			`if not videoid:`
			`continue`

			`url = base_invidious_url + videoid`
			`thumbs = result.get("videoThumbnails", [])`
[format.python] initial formatting of the python code This patch was generated by black [1]:: make format.python [1] https://github.com/psf/black Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-12-27 09:26:22 +01:00			`thumb = next((th for th in thumbs if th["quality"] == "sddefault"), None)`
[enh] add invidious engine. (#1657) closes #1372 2019-08-02 13:25:25 +02:00			`if thumb:`
			`thumbnail = thumb.get("url", "")`
			`else:`
			`thumbnail = ""`

[fix] engine - invidious thumbnails 2023-09-13 07:36:41 +02:00			`# some instances return a partial thumbnail url`
			`# we check if the url is partial, and prepend the base_url if it is`
			`if thumbnail and not urlparse(thumbnail).netloc:`
			`thumbnail = resp.search_params['base_url'] + thumbnail`

[format.python] initial formatting of the python code This patch was generated by black [1]:: make format.python [1] https://github.com/psf/black Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-12-27 09:26:22 +01:00			`publishedDate = parser.parse(time.ctime(result.get("published", 0)))`
include length in invidious results 2020-08-02 13:31:04 +02:00			`length = time.gmtime(result.get("lengthSeconds"))`
			`if length.tm_hour:`
			`length = time.strftime("%H:%M:%S", length)`
			`else:`
			`length = time.strftime("%M:%S", length)`
[enh] add invidious engine. (#1657) closes #1372 2019-08-02 13:25:25 +02:00
			`results.append(`
			`{`
			`"url": url,`
			`"title": result.get("title", ""),`
			`"content": result.get("description", ""),`
[feat] videos template: support for view count 2024-07-20 21:27:12 +02:00			`"length": length,`
			`"views": humanize_number(result['viewCount']),`
[enh] add invidious engine. (#1657) closes #1372 2019-08-02 13:25:25 +02:00			`"template": "videos.html",`
include author in invidious results 2020-08-02 13:30:38 +02:00			`"author": result.get("author"),`
[enh] add invidious engine. (#1657) closes #1372 2019-08-02 13:25:25 +02:00			`"publishedDate": publishedDate,`
[mod] templates: rename field for <iframe> URL to iframe_src Rename result field data_src to iframe_src Suggested-by: @dalf https://github.com/searxng/searxng/pull/882#issuecomment-1037997402 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2022-02-13 16:12:46 +01:00			`"iframe_src": resp.search_params['base_url'] + '/embed/' + videoid,`
[enh] add invidious engine. (#1657) closes #1372 2019-08-02 13:25:25 +02:00			`"thumbnail": thumbnail,`
			`}`
			`)`

			`return results`