searxng/searx/engines/generalfile.py

## General Files (Files)
# 
# @website     http://www.general-files.org
# @provide-api no (nothing found)
# 
# @using-api   no (because nothing found)
# @results     HTML (using search portal)
# @stable      no (HTML can change)
# @parse       url, title, content
#
# @todo        detect torrents?

from lxml import html

# engine dependent config
categories = ['files']
paging = True

# search-url
base_url = 'http://www.general-file.com'
search_url = base_url + '/files-{letter}/{query}/{pageno}'

# specific xpath variables
result_xpath = '//table[@class="block-file"]'
title_xpath = './/h2/a//text()'
url_xpath = './/h2/a/@href'
content_xpath = './/p//text()'


# do search-request
def request(query, params):

    params['url'] = search_url.format(query=query,
                                      letter=query[0],
                                      pageno=params['pageno'])

    return params


# get response from search-request
def response(resp):
    results = []

    dom = html.fromstring(resp.text)

    # parse results
    for result in dom.xpath(result_xpath):
        url = result.xpath(url_xpath)[0]

        # skip fast download links
        if not url.startswith('/'):
            continue

        # append result
        results.append({'url': base_url + url,
                        'title': ''.join(result.xpath(title_xpath)),
                        'content': ''.join(result.xpath(content_xpath))})

    # return results
    return results
update generalfile engine and add comments 2014-09-02 17:28:35 +02:00			`## General Files (Files)`
			`#`
			`# @website http://www.general-files.org`
			`# @provide-api no (nothing found)`
			`#`
			`# @using-api no (because nothing found)`
			`# @results HTML (using search portal)`
			`# @stable no (HTML can change)`
			`# @parse url, title, content`
			`#`
			`# @todo detect torrents?`

[enh] general-file.com engine added 2014-06-27 17:25:16 +02:00			`from lxml import html`

update generalfile engine and add comments 2014-09-02 17:28:35 +02:00			`# engine dependent config`
			`categories = ['files']`
			`paging = True`
[enh] general-file.com engine added 2014-06-27 17:25:16 +02:00
update generalfile engine and add comments 2014-09-02 17:28:35 +02:00			`# search-url`
[enh] general-file.com engine added 2014-06-27 17:25:16 +02:00			`base_url = 'http://www.general-file.com'`
			`search_url = base_url + '/files-{letter}/{query}/{pageno}'`

update generalfile engine and add comments 2014-09-02 17:28:35 +02:00			`# specific xpath variables`
[enh] general-file.com engine added 2014-06-27 17:25:16 +02:00			`result_xpath = '//table[@class="block-file"]'`
			`title_xpath = './/h2/a//text()'`
			`url_xpath = './/h2/a/@href'`
			`content_xpath = './/p//text()'`


update generalfile engine and add comments 2014-09-02 17:28:35 +02:00			`# do search-request`
[enh] general-file.com engine added 2014-06-27 17:25:16 +02:00			`def request(query, params):`
update generalfile engine and add comments 2014-09-02 17:28:35 +02:00
[enh] general-file.com engine added 2014-06-27 17:25:16 +02:00			`params['url'] = search_url.format(query=query,`
			`letter=query[0],`
			`pageno=params['pageno'])`
update generalfile engine and add comments 2014-09-02 17:28:35 +02:00
[enh] general-file.com engine added 2014-06-27 17:25:16 +02:00			`return params`


update generalfile engine and add comments 2014-09-02 17:28:35 +02:00			`# get response from search-request`
[enh] general-file.com engine added 2014-06-27 17:25:16 +02:00			`def response(resp):`
			`results = []`
update generalfile engine and add comments 2014-09-02 17:28:35 +02:00
[enh] general-file.com engine added 2014-06-27 17:25:16 +02:00			`dom = html.fromstring(resp.text)`
update generalfile engine and add comments 2014-09-02 17:28:35 +02:00
			`# parse results`
[enh] general-file.com engine added 2014-06-27 17:25:16 +02:00			`for result in dom.xpath(result_xpath):`
			`url = result.xpath(url_xpath)[0]`
update generalfile engine and add comments 2014-09-02 17:28:35 +02:00
[enh] general-file.com engine added 2014-06-27 17:25:16 +02:00			`# skip fast download links`
			`if not url.startswith('/'):`
			`continue`
update generalfile engine and add comments 2014-09-02 17:28:35 +02:00
			`# append result`
[enh] general-file.com engine added 2014-06-27 17:25:16 +02:00			`results.append({'url': base_url + url,`
			`'title': ''.join(result.xpath(title_xpath)),`
			`'content': ''.join(result.xpath(content_xpath))})`

update generalfile engine and add comments 2014-09-02 17:28:35 +02:00			`# return results`
[enh] general-file.com engine added 2014-06-27 17:25:16 +02:00			`return results`