mirror of
https://github.com/searxng/searxng.git
synced 2024-11-05 12:50:11 +01:00
3a1c5876b1
Unfortunately, it is quite slow so it is disabled. Furthermore, the display of number of files is wrong on digbt.org, so it is not displayed on searx.
95 lines
2.7 KiB
Python
95 lines
2.7 KiB
Python
"""
|
|
BTDigg (Videos, Music, Files)
|
|
|
|
@website https://btdigg.org
|
|
@provide-api yes (on demand)
|
|
|
|
@using-api no
|
|
@results HTML (using search portal)
|
|
@stable no (HTML can change)
|
|
@parse url, title, content, seed, leech, magnetlink
|
|
"""
|
|
|
|
from urlparse import urljoin
|
|
from cgi import escape
|
|
from urllib import quote
|
|
from lxml import html
|
|
from operator import itemgetter
|
|
from searx.engines.xpath import extract_text
|
|
from searx.utils import get_torrent_size
|
|
|
|
# engine dependent config
|
|
categories = ['videos', 'music', 'files']
|
|
paging = True
|
|
|
|
# search-url
|
|
url = 'https://btdigg.org'
|
|
search_url = url + '/search?q={search_term}&p={pageno}'
|
|
|
|
|
|
# do search-request
|
|
def request(query, params):
|
|
params['url'] = search_url.format(search_term=quote(query),
|
|
pageno=params['pageno'] - 1)
|
|
|
|
return params
|
|
|
|
|
|
# get response from search-request
|
|
def response(resp):
|
|
results = []
|
|
|
|
dom = html.fromstring(resp.content)
|
|
|
|
search_res = dom.xpath('//div[@id="search_res"]/table/tr')
|
|
|
|
# return empty array if nothing is found
|
|
if not search_res:
|
|
return []
|
|
|
|
# parse results
|
|
for result in search_res:
|
|
link = result.xpath('.//td[@class="torrent_name"]//a')[0]
|
|
href = urljoin(url, link.attrib.get('href'))
|
|
title = escape(extract_text(link))
|
|
content = escape(extract_text(result.xpath('.//pre[@class="snippet"]')[0]))
|
|
content = "<br />".join(content.split("\n"))
|
|
|
|
filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0]
|
|
filesize_multiplier = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[1]
|
|
files = result.xpath('.//span[@class="attr_val"]/text()')[1]
|
|
seed = result.xpath('.//span[@class="attr_val"]/text()')[2]
|
|
|
|
# convert seed to int if possible
|
|
if seed.isdigit():
|
|
seed = int(seed)
|
|
else:
|
|
seed = 0
|
|
|
|
leech = 0
|
|
|
|
# convert filesize to byte if possible
|
|
filesize = get_torrent_size(filesize, filesize_multiplier)
|
|
|
|
# convert files to int if possible
|
|
if files.isdigit():
|
|
files = int(files)
|
|
else:
|
|
files = None
|
|
|
|
magnetlink = result.xpath('.//td[@class="ttth"]//a')[0].attrib['href']
|
|
|
|
# append result
|
|
results.append({'url': href,
|
|
'title': title,
|
|
'content': content,
|
|
'seed': seed,
|
|
'leech': leech,
|
|
'filesize': filesize,
|
|
'files': files,
|
|
'magnetlink': magnetlink,
|
|
'template': 'torrent.html'})
|
|
|
|
# return results sorted by seeder
|
|
return sorted(results, key=itemgetter('seed'), reverse=True)
|