searxng/searx/engines/tineye.py

# SPDX-License-Identifier: AGPL-3.0-or-later
# lint: pylint
"""This engine implements *Tineye - reverse image search*

Using TinEye, you can search by image or perform what we call a reverse image
search.  You can do that by uploading an image or searching by URL. You can also
simply drag and drop your images to start your search.  TinEye constantly crawls
the web and adds images to its index.  Today, the TinEye index is over 50.2
billion images `[tineye.com] <https://tineye.com/how>`_.

.. hint::

   This SearXNG engine only supports *'searching by URL'* and it does not use
   the official API `[api.tineye.com] <https://api.tineye.com/python/docs/>`_.

"""

from urllib.parse import urlencode
from datetime import datetime

about = {
    "website": 'https://tineye.com',
    "wikidata_id": 'Q2382535',
    "official_api_documentation": 'https://api.tineye.com/python/docs/',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'JSON',
}

engine_type = 'online_url_search'
categories = ['general']
paging = True
safesearch = False
base_url = 'https://tineye.com'
search_string = '/result_json/?page={page}&{query}'


def request(query, params):

    if params['search_urls']['data:image']:
        query = params['search_urls']['data:image']
    elif params['search_urls']['http']:
        query = params['search_urls']['http']

    query = urlencode({'url': query})

    # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py
    params['url'] = base_url + search_string.format(query=query, page=params['pageno'])

    params['headers'].update(
        {
            'Connection': 'keep-alive',
            'Accept-Encoding': 'gzip, defalte, br',
            'Host': 'tineye.com',
            'DNT': '1',
            'TE': 'trailers',
        }
    )
    return params


def response(resp):
    results = []

    # Define wanted results
    json_data = resp.json()
    number_of_results = json_data['num_matches']

    for i in json_data['matches']:
        image_format = i['format']
        width = i['width']
        height = i['height']
        thumbnail_src = i['image_url']
        backlink = i['domains'][0]['backlinks'][0]
        url = backlink['backlink']
        source = backlink['url']
        title = backlink['image_name']
        img_src = backlink['url']

        # Get and convert published date
        api_date = backlink['crawl_date'][:-3]
        publishedDate = datetime.fromisoformat(api_date)

        # Append results
        results.append(
            {
                'template': 'images.html',
                'url': url,
                'thumbnail_src': thumbnail_src,
                'source': source,
                'title': title,
                'img_src': img_src,
                'format': image_format,
                'widht': width,
                'height': height,
                'publishedDate': publishedDate,
            }
        )

    # Append number of results
    results.append({'number_of_results': number_of_results})

    return results
[enh] engine - add Tineye reverse image search Other optional parameter .. `&sort=crawl_date` can be appended to search_string to sort results by date. `&domain=example.org` can be implemented to search_string to get results from just one domain. Public instances could get relatively fast timed-out for 3600s. -- Merged from @allendema's commit [1] and slightly modfied / see [2]. Related-to: [1] https://github.com/allendema/searx/commit/455b2b4460cd830ac1f7e62e824040e2fe648de9 Related-to: [2] https://github.com/searx/searx/pull/3040 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-10-27 03:04:52 +02:00			`# SPDX-License-Identifier: AGPL-3.0-or-later`
			`# lint: pylint`
[mod] tineye - add some documentation Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2022-01-25 16:37:18 +01:00			`"""This engine implements Tineye - reverse image search`

			`Using TinEye, you can search by image or perform what we call a reverse image`
			`search. You can do that by uploading an image or searching by URL. You can also`
			`simply drag and drop your images to start your search. TinEye constantly crawls`
			`the web and adds images to its index. Today, the TinEye index is over 50.2`
			billion images `[tineye.com] <https://tineye.com/how>`_.

			`.. hint::`

			`This SearXNG engine only supports 'searching by URL' and it does not use`
			the official API `[api.tineye.com] <https://api.tineye.com/python/docs/>`_.
[enh] engine - add Tineye reverse image search Other optional parameter .. `&sort=crawl_date` can be appended to search_string to sort results by date. `&domain=example.org` can be implemented to search_string to get results from just one domain. Public instances could get relatively fast timed-out for 3600s. -- Merged from @allendema's commit [1] and slightly modfied / see [2]. Related-to: [1] https://github.com/allendema/searx/commit/455b2b4460cd830ac1f7e62e824040e2fe648de9 Related-to: [2] https://github.com/searx/searx/pull/3040 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-10-27 03:04:52 +02:00
			`"""`

			`from urllib.parse import urlencode`
			`from datetime import datetime`

			`about = {`
			`"website": 'https://tineye.com',`
			`"wikidata_id": 'Q2382535',`
[mod] tineye - add some documentation Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2022-01-25 16:37:18 +01:00			`"official_api_documentation": 'https://api.tineye.com/python/docs/',`
[enh] engine - add Tineye reverse image search Other optional parameter .. `&sort=crawl_date` can be appended to search_string to sort results by date. `&domain=example.org` can be implemented to search_string to get results from just one domain. Public instances could get relatively fast timed-out for 3600s. -- Merged from @allendema's commit [1] and slightly modfied / see [2]. Related-to: [1] https://github.com/allendema/searx/commit/455b2b4460cd830ac1f7e62e824040e2fe648de9 Related-to: [2] https://github.com/searx/searx/pull/3040 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-10-27 03:04:52 +02:00			`"use_official_api": False,`
			`"require_api_key": False,`
			`"results": 'JSON',`
			`}`

[mod] tineye engine: set engine_type to 'online_url_search' Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2022-01-30 16:30:52 +01:00			`engine_type = 'online_url_search'`
			`categories = ['general']`
[enh] engine - add Tineye reverse image search Other optional parameter .. `&sort=crawl_date` can be appended to search_string to sort results by date. `&domain=example.org` can be implemented to search_string to get results from just one domain. Public instances could get relatively fast timed-out for 3600s. -- Merged from @allendema's commit [1] and slightly modfied / see [2]. Related-to: [1] https://github.com/allendema/searx/commit/455b2b4460cd830ac1f7e62e824040e2fe648de9 Related-to: [2] https://github.com/searx/searx/pull/3040 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-10-27 03:04:52 +02:00			`paging = True`
			`safesearch = False`
			`base_url = 'https://tineye.com'`
			`search_string = '/result_json/?page={page}&{query}'`


			`def request(query, params):`
[mod] tineye engine: set engine_type to 'online_url_search' Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2022-01-30 16:30:52 +01:00
			`if params['search_urls']['data:image']:`
			`query = params['search_urls']['data:image']`
			`elif params['search_urls']['http']:`
			`query = params['search_urls']['http']`

			`query = urlencode({'url': query})`

[enh] engine - add Tineye reverse image search Other optional parameter .. `&sort=crawl_date` can be appended to search_string to sort results by date. `&domain=example.org` can be implemented to search_string to get results from just one domain. Public instances could get relatively fast timed-out for 3600s. -- Merged from @allendema's commit [1] and slightly modfied / see [2]. Related-to: [1] https://github.com/allendema/searx/commit/455b2b4460cd830ac1f7e62e824040e2fe648de9 Related-to: [2] https://github.com/searx/searx/pull/3040 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-10-27 03:04:52 +02:00			`# see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py`
[mod] tineye engine: set engine_type to 'online_url_search' Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2022-01-30 16:30:52 +01:00			`params['url'] = base_url + search_string.format(query=query, page=params['pageno'])`
[enh] engine - add Tineye reverse image search Other optional parameter .. `&sort=crawl_date` can be appended to search_string to sort results by date. `&domain=example.org` can be implemented to search_string to get results from just one domain. Public instances could get relatively fast timed-out for 3600s. -- Merged from @allendema's commit [1] and slightly modfied / see [2]. Related-to: [1] https://github.com/allendema/searx/commit/455b2b4460cd830ac1f7e62e824040e2fe648de9 Related-to: [2] https://github.com/searx/searx/pull/3040 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-10-27 03:04:52 +02:00
			`params['headers'].update(`
			`{`
			`'Connection': 'keep-alive',`
			`'Accept-Encoding': 'gzip, defalte, br',`
			`'Host': 'tineye.com',`
			`'DNT': '1',`
			`'TE': 'trailers',`
			`}`
			`)`
			`return params`


			`def response(resp):`
			`results = []`

			`# Define wanted results`
[mod] tineye engine: minor changes * remove "disable: false" in settings.yml * use the json() method from httpx.Response (faster character encoding detection) 2022-01-30 20:44:30 +01:00			`json_data = resp.json()`
[enh] engine - add Tineye reverse image search Other optional parameter .. `&sort=crawl_date` can be appended to search_string to sort results by date. `&domain=example.org` can be implemented to search_string to get results from just one domain. Public instances could get relatively fast timed-out for 3600s. -- Merged from @allendema's commit [1] and slightly modfied / see [2]. Related-to: [1] https://github.com/allendema/searx/commit/455b2b4460cd830ac1f7e62e824040e2fe648de9 Related-to: [2] https://github.com/searx/searx/pull/3040 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de> 2021-10-27 03:04:52 +02:00			`number_of_results = json_data['num_matches']`

			`for i in json_data['matches']:`
			`image_format = i['format']`
			`width = i['width']`
			`height = i['height']`
			`thumbnail_src = i['image_url']`
			`backlink = i['domains'][0]['backlinks'][0]`
			`url = backlink['backlink']`
			`source = backlink['url']`
			`title = backlink['image_name']`
			`img_src = backlink['url']`

			`# Get and convert published date`
			`api_date = backlink['crawl_date'][:-3]`
			`publishedDate = datetime.fromisoformat(api_date)`

			`# Append results`
			`results.append(`
			`{`
			`'template': 'images.html',`
			`'url': url,`
			`'thumbnail_src': thumbnail_src,`
			`'source': source,`
			`'title': title,`
			`'img_src': img_src,`
			`'format': image_format,`
			`'widht': width,`
			`'height': height,`
			`'publishedDate': publishedDate,`
			`}`
			`)`

			`# Append number of results`
			`results.append({'number_of_results': number_of_results})`

			`return results`