searxng/searx/engines/digbt.py

"""
 DigBT (Videos, Music, Files)

 @website     https://digbt.org
 @provide-api no

 @using-api   no
 @results     HTML (using search portal)
 @stable      no (HTML can change)
 @parse       url, title, content, magnetlink
"""

from urllib.parse import urljoin
from lxml import html
from searx.utils import extract_text, get_torrent_size


categories = ['videos', 'music', 'files']
paging = True

URL = 'https://digbt.org'
SEARCH_URL = URL + '/search/{query}-time-{pageno}'
FILESIZE = 3
FILESIZE_MULTIPLIER = 4


def request(query, params):
    params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno'])

    return params


def response(resp):
    dom = html.fromstring(resp.text)
    search_res = dom.xpath('.//td[@class="x-item"]')

    if not search_res:
        return list()

    results = list()
    for result in search_res:
        url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
        title = extract_text(result.xpath('.//a[@title]'))
        content = extract_text(result.xpath('.//div[@class="files"]'))
        files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
        filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
        magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]

        results.append({'url': url,
                        'title': title,
                        'content': content,
                        'filesize': filesize,
                        'magnetlink': magnetlink,
                        'seed': 'N/A',
                        'leech': 'N/A',
                        'template': 'torrent.html'})

    return results
add digbt engine Unfortunately, it is quite slow so it is disabled. Furthermore, the display of number of files is wrong on digbt.org, so it is not displayed on searx. 2016-08-13 14:55:47 +02:00			`"""`
			`DigBT (Videos, Music, Files)`

			`@website https://digbt.org`
			`@provide-api no`

			`@using-api no`
			`@results HTML (using search portal)`
			`@stable no (HTML can change)`
			`@parse url, title, content, magnetlink`
			`"""`

Drop Python 2 (1/n): remove unicode string and url_utils 2020-08-06 17:42:46 +02:00			`from urllib.parse import urljoin`
add digbt engine Unfortunately, it is quite slow so it is disabled. Furthermore, the display of number of files is wrong on digbt.org, so it is not displayed on searx. 2016-08-13 14:55:47 +02:00			`from lxml import html`
[mod] move extract_text, extract_url to searx.utils 2020-10-02 18:13:56 +02:00			`from searx.utils import extract_text, get_torrent_size`
[enh] py3 compatibility 2016-11-30 18:43:03 +01:00
add digbt engine Unfortunately, it is quite slow so it is disabled. Furthermore, the display of number of files is wrong on digbt.org, so it is not displayed on searx. 2016-08-13 14:55:47 +02:00
			`categories = ['videos', 'music', 'files']`
			`paging = True`

			`URL = 'https://digbt.org'`
			`SEARCH_URL = URL + '/search/{query}-time-{pageno}'`
			`FILESIZE = 3`
			`FILESIZE_MULTIPLIER = 4`


			`def request(query, params):`
			`params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno'])`

			`return params`


			`def response(resp):`
[enh] py3 compatibility 2016-11-30 18:43:03 +01:00			`dom = html.fromstring(resp.text)`
add digbt engine Unfortunately, it is quite slow so it is disabled. Furthermore, the display of number of files is wrong on digbt.org, so it is not displayed on searx. 2016-08-13 14:55:47 +02:00			`search_res = dom.xpath('.//td[@class="x-item"]')`

			`if not search_res:`
			`return list()`

			`results = list()`
			`for result in search_res:`
			`url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])`
[fix] results with digbit don't truncate anymore 2016-09-20 22:35:54 +02:00			`title = extract_text(result.xpath('.//a[@title]'))`
add digbt engine Unfortunately, it is quite slow so it is disabled. Furthermore, the display of number of files is wrong on digbt.org, so it is not displayed on searx. 2016-08-13 14:55:47 +02:00			`content = extract_text(result.xpath('.//div[@class="files"]'))`
			`files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()`
			`filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])`
			`magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]`

			`results.append({'url': url,`
			`'title': title,`
			`'content': content,`
			`'filesize': filesize,`
			`'magnetlink': magnetlink,`
			`'seed': 'N/A',`
			`'leech': 'N/A',`
			`'template': 'torrent.html'})`

			`return results`