From 15a4c10c990bb8d99e29a30d8d52638d3e3d8fe8 Mon Sep 17 00:00:00 2001 From: Cqoicebordel Date: Tue, 9 Dec 2014 19:19:39 +0100 Subject: [PATCH] First pass at Kickass Engine Parse and return results correctly. Pages numbers taken care of. Not done, and maybe to do : - 'content' : I don't know what it could be. Maybe votes ? - 'categories' : the results are not filtered by categories, because I don't see how to do it properly : there are too much categories on Kickass. Is 'video' only movies, or also tv show or porn ? So for now, the category is 'all'. - Favicon/icon : may be a good idea. --- searx/engines/kickass.py | 83 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 searx/engines/kickass.py diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py new file mode 100644 index 000000000..640a18a56 --- /dev/null +++ b/searx/engines/kickass.py @@ -0,0 +1,83 @@ +## Kickass Torrent (Videos, Music, Files) +# +# @website https://kickass.so +# @provide-api no (nothing found) +# +# @using-api no +# @results HTML (using search portal) +# @stable yes (HTML can change) +# @parse url, title, content, seed, leech, magnetlink + +from urlparse import urljoin +from cgi import escape +from urllib import quote +from lxml import html +from operator import itemgetter + +# engine dependent config +categories = ['videos', 'music', 'files'] +paging = True + +# search-url +url = 'https://kickass.so/' +search_url = url + 'search/{search_term}/{pageno}/' + +# specific xpath variables +magnet_xpath = './/a[@title="Torrent magnet link"]' +#content_xpath = './/font[@class="detDesc"]//text()' + + +# do search-request +def request(query, params): + params['url'] = search_url.format(search_term=quote(query), + pageno=params['pageno']) + + return params + + +# get response from search-request +def response(resp): + results = [] + + dom = html.fromstring(resp.text) + + search_res = dom.xpath('//table[@class="data"]//tr') + + # return empty array if nothing is found + if not search_res: + return [] + + # parse results + for result in search_res[1:]: + link = result.xpath('.//a[@class="cellMainLink"]')[0] + href = urljoin(url, link.attrib['href']) + title = ' '.join(link.xpath('.//text()')) + #content = escape(' '.join(result.xpath(content_xpath))) + seed = result.xpath('.//td[contains(@class, "green")]/text()')[0] + leech = result.xpath('.//td[contains(@class, "red")]/text()')[0] + + # convert seed to int if possible + if seed.isdigit(): + seed = int(seed) + else: + seed = 0 + + # convert leech to int if possible + if leech.isdigit(): + leech = int(leech) + else: + leech = 0 + + magnetlink = result.xpath(magnet_xpath)[0].attrib['href'] + + # append result + results.append({'url': href, + 'title': title, + 'content': '', + 'seed': seed, + 'leech': leech, + 'magnetlink': magnetlink, + 'template': 'torrent.html'}) + + # return results sorted by seeder + return sorted(results, key=itemgetter('seed'), reverse=True)