1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-22 12:10:11 +01:00

add Ahmia filter plugin for onion results

This commit is contained in:
Marc Abonce Seguin 2020-10-18 23:55:57 -07:00
parent c3daa08537
commit 32957cdf49
5 changed files with 16253 additions and 0 deletions

16177
searx/data/ahmia_blacklist.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@ -28,6 +28,7 @@ from searx import logger, settings, static_path
logger = logger.getChild('plugins') logger = logger.getChild('plugins')
from searx.plugins import (oa_doi_rewrite, from searx.plugins import (oa_doi_rewrite,
ahmia_filter,
hash_plugin, hash_plugin,
https_rewrite, https_rewrite,
infinite_scroll, infinite_scroll,
@ -181,3 +182,7 @@ if 'enabled_plugins' in settings:
plugin.default_on = True plugin.default_on = True
else: else:
plugin.default_on = False plugin.default_on = False
# load tor specific plugins
if settings['outgoing'].get('using_tor_proxy'):
plugins.register(ahmia_filter)

View File

@ -0,0 +1,36 @@
'''
SPDX-License-Identifier: AGPL-3.0-or-later
'''
from hashlib import md5
from os.path import join
from urllib.parse import urlparse
from searx import searx_dir
name = "Ahmia blacklist"
description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)"
default_on = True
preference_section = 'onions'
ahmia_blacklist = None
def get_ahmia_blacklist():
global ahmia_blacklist
if not ahmia_blacklist:
with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f:
ahmia_blacklist = f.read().split()
return ahmia_blacklist
def not_blacklisted(result):
if not result.get('is_onion'):
return True
result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest()
return result_hash not in get_ahmia_blacklist()
def post_search(request, search):
filtered_results = list(filter(not_blacklisted, search.result_container._merged_results))
search.result_container._merged_results = filtered_results
return True

View File

@ -258,6 +258,7 @@
<fieldset> <fieldset>
<div class="container-fluid"> <div class="container-fluid">
{% for plugin in plugins %} {% for plugin in plugins %}
{% if plugin.preference_section != 'onions' %}
<div class="panel panel-default"> <div class="panel panel-default">
<div class="panel-heading"> <div class="panel-heading">
<h3 class="panel-title">{{ _(plugin.name) }}</h3> <h3 class="panel-title">{{ _(plugin.name) }}</h3>
@ -271,6 +272,7 @@
</div> </div>
</div> </div>
</div> </div>
{% endif %}
{% endfor %} {% endfor %}
</div> </div>
</fieldset> </fieldset>

33
utils/fetch_ahmia_blacklist.py Executable file
View File

@ -0,0 +1,33 @@
#!/usr/bin/env python
# This script saves Ahmia's blacklist for onion sites.
# More info in https://ahmia.fi/blacklist/
# set path
from sys import path
from os.path import realpath, dirname, join
path.append(realpath(dirname(realpath(__file__)) + '/../'))
#
import requests
from searx import searx_dir
URL = 'https://ahmia.fi/blacklist/banned/'
def fetch_ahmia_blacklist():
resp = requests.get(URL, timeout=3.0)
if resp.status_code != 200:
raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)
else:
blacklist = resp.text.split()
return blacklist
def get_ahmia_blacklist_filename():
return join(join(searx_dir, "data"), "ahmia_blacklist.txt")
blacklist = fetch_ahmia_blacklist()
with open(get_ahmia_blacklist_filename(), "w") as f:
f.write('\n'.join(blacklist))