searxng/searx/engines/not_evil.py

# SPDX-License-Identifier: AGPL-3.0-or-later
"""
 not Evil (Onions)
"""

from urllib.parse import urlencode
from lxml import html
from searx.engines.xpath import extract_text

# about
about = {
    "website": 'http://hss3uro2hsxfogfq.onion',
    "wikidata_id": None,
    "official_api_documentation": 'http://hss3uro2hsxfogfq.onion/api.htm',
    "use_official_api": False,
    "require_api_key": False,
    "results": 'HTML',
}

# engine dependent config
categories = ['onions']
paging = True
page_size = 20

# search-url
base_url = 'http://hss3uro2hsxfogfq.onion/'
search_url = 'index.php?{query}&hostLimit=20&start={pageno}&numRows={page_size}'

# specific xpath variables
results_xpath = '//*[@id="content"]/div/p'
url_xpath = './span[1]'
title_xpath = './a[1]'
content_xpath = './text()'


# do search-request
def request(query, params):
    offset = (params['pageno'] - 1) * page_size

    params['url'] = base_url + search_url.format(pageno=offset,
                                                 query=urlencode({'q': query}),
                                                 page_size=page_size)

    return params


# get response from search-request
def response(resp):
    results = []

    # needed because otherwise requests guesses wrong encoding
    resp.encoding = 'utf8'
    dom = html.fromstring(resp.text)

    # parse results
    for result in dom.xpath(results_xpath):
        url = extract_text(result.xpath(url_xpath)[0])
        title = extract_text(result.xpath(title_xpath)[0])
        content = extract_text(result.xpath(content_xpath))

        # append result
        results.append({'url': url,
                        'title': title,
                        'content': content,
                        'is_onion': True})

    return results
[enh] engines: add about variable move meta information from comment to the about variable so the preferences, the documentation can show these information 2021-01-13 11:31:25 +01:00			`# SPDX-License-Identifier: AGPL-3.0-or-later`
[enh] Add onions category with Ahmia, Not Evil and Torch Xpath engine and results template changed to account for the fact that archive.org doesn't cache .onions, though some onion engines migth have their own cache. Disabled by default. Can be enabled by setting the SOCKS proxies to wherever Tor is listening and setting using_tor_proxy as True. Requires Tor and updating packages. To avoid manually adding the timeout on each engine, you can set extra_proxy_timeout to account for Tor's (or whatever proxy used) extra time. 2016-05-19 07:38:43 +02:00			`"""`
			`not Evil (Onions)`
			`"""`

			`from urllib.parse import urlencode`
			`from lxml import html`
			`from searx.engines.xpath import extract_text`

[enh] engines: add about variable move meta information from comment to the about variable so the preferences, the documentation can show these information 2021-01-13 11:31:25 +01:00			`# about`
			`about = {`
			`"website": 'http://hss3uro2hsxfogfq.onion',`
			`"wikidata_id": None,`
			`"official_api_documentation": 'http://hss3uro2hsxfogfq.onion/api.htm',`
			`"use_official_api": False,`
			`"require_api_key": False,`
			`"results": 'HTML',`
			`}`

[enh] Add onions category with Ahmia, Not Evil and Torch Xpath engine and results template changed to account for the fact that archive.org doesn't cache .onions, though some onion engines migth have their own cache. Disabled by default. Can be enabled by setting the SOCKS proxies to wherever Tor is listening and setting using_tor_proxy as True. Requires Tor and updating packages. To avoid manually adding the timeout on each engine, you can set extra_proxy_timeout to account for Tor's (or whatever proxy used) extra time. 2016-05-19 07:38:43 +02:00			`# engine dependent config`
			`categories = ['onions']`
			`paging = True`
			`page_size = 20`

			`# search-url`
			`base_url = 'http://hss3uro2hsxfogfq.onion/'`
			`search_url = 'index.php?{query}&hostLimit=20&start={pageno}&numRows={page_size}'`

			`# specific xpath variables`
			`results_xpath = '//*[@id="content"]/div/p'`
			`url_xpath = './span[1]'`
			`title_xpath = './a[1]'`
			`content_xpath = './text()'`


			`# do search-request`
			`def request(query, params):`
			`offset = (params['pageno'] - 1) * page_size`

			`params['url'] = base_url + search_url.format(pageno=offset,`
			`query=urlencode({'q': query}),`
			`page_size=page_size)`

			`return params`


			`# get response from search-request`
			`def response(resp):`
			`results = []`

			`# needed because otherwise requests guesses wrong encoding`
			`resp.encoding = 'utf8'`
			`dom = html.fromstring(resp.text)`

			`# parse results`
			`for result in dom.xpath(results_xpath):`
			`url = extract_text(result.xpath(url_xpath)[0])`
			`title = extract_text(result.xpath(title_xpath)[0])`
			`content = extract_text(result.xpath(content_xpath))`

			`# append result`
			`results.append({'url': url,`
			`'title': title,`
			`'content': content,`
			`'is_onion': True})`

			`return results`