1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-22 12:10:11 +01:00

[mod] ahmia_filter.py: minor changes

- use result['parsed_url']
- load ahmia_blacklist.txt in searx.datae
This commit is contained in:
Alexandre Flament 2020-10-26 20:40:24 +01:00
parent db703a0283
commit 5e7060053c
2 changed files with 10 additions and 8 deletions

View File

@ -2,7 +2,7 @@ import json
from pathlib import Path from pathlib import Path
__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader'] __init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader', 'ahmia_blacklist_loader']
data_dir = Path(__file__).parent data_dir = Path(__file__).parent
@ -16,6 +16,11 @@ def bangs_loader():
return load('bangs.json') return load('bangs.json')
def ahmia_blacklist_loader():
with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as fd:
return fd.read().split()
ENGINES_LANGUAGES = load('engines_languages.json') ENGINES_LANGUAGES = load('engines_languages.json')
CURRENCIES = load('currencies.json') CURRENCIES = load('currencies.json')
USER_AGENTS = load('useragents.json') USER_AGENTS = load('useragents.json')

View File

@ -3,9 +3,7 @@
''' '''
from hashlib import md5 from hashlib import md5
from os.path import join from searx.data import ahmia_blacklist_loader
from urllib.parse import urlparse
from searx import searx_dir
name = "Ahmia blacklist" name = "Ahmia blacklist"
description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)" description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)"
@ -18,15 +16,14 @@ ahmia_blacklist = None
def get_ahmia_blacklist(): def get_ahmia_blacklist():
global ahmia_blacklist global ahmia_blacklist
if not ahmia_blacklist: if not ahmia_blacklist:
with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f: ahmia_blacklist = ahmia_blacklist_loader()
ahmia_blacklist = f.read().split()
return ahmia_blacklist return ahmia_blacklist
def not_blacklisted(result): def not_blacklisted(result):
if not result.get('is_onion'): if not result.get('is_onion') or not result.get('parsed_url'):
return True return True
result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest() result_hash = md5(result['parsed_url'].hostname.encode()).hexdigest()
return result_hash not in get_ahmia_blacklist() return result_hash not in get_ahmia_blacklist()