mirror of
https://github.com/searxng/searxng.git
synced 2024-11-19 02:40:11 +01:00
[enh] https rewrite basics
This commit is contained in:
parent
b44643222f
commit
96c8b20a04
14
searx/https_rewrite.py
Normal file
14
searx/https_rewrite.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
# https://gitweb.torproject.org/\
|
||||||
|
# pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules
|
||||||
|
|
||||||
|
# HTTPS rewrite rules
|
||||||
|
https_rules = (
|
||||||
|
# from
|
||||||
|
(re.compile(r'^http://(www\.|m\.|)?xkcd\.(?:com|org)/', re.I | re.U),
|
||||||
|
# to
|
||||||
|
r'https://\1xkcd.com/'),
|
||||||
|
(re.compile(r'^https?://(?:ssl)?imgs\.xkcd\.com/', re.I | re.U),
|
||||||
|
r'https://sslimgs.xkcd.com/'),
|
||||||
|
)
|
@ -1,11 +1,12 @@
|
|||||||
server:
|
server:
|
||||||
port : 8888
|
port : 8888
|
||||||
secret_key : "ultrasecretkey" # change this!
|
secret_key : "ultrasecretkey" # change this!
|
||||||
debug : True
|
debug : False
|
||||||
request_timeout : 2.0 # seconds
|
request_timeout : 2.0 # seconds
|
||||||
base_url : False
|
base_url : False
|
||||||
themes_path : ""
|
themes_path : ""
|
||||||
default_theme : default
|
default_theme : default
|
||||||
|
https_rewrite : True
|
||||||
|
|
||||||
engines:
|
engines:
|
||||||
- name : wikipedia
|
- name : wikipedia
|
||||||
|
@ -41,13 +41,16 @@ from searx.engines import (
|
|||||||
from searx.utils import (
|
from searx.utils import (
|
||||||
UnicodeWriter, highlight_content, html_to_text, get_themes
|
UnicodeWriter, highlight_content, html_to_text, get_themes
|
||||||
)
|
)
|
||||||
|
from searx.https_rewrite import https_rules
|
||||||
from searx.languages import language_codes
|
from searx.languages import language_codes
|
||||||
from searx.search import Search
|
from searx.search import Search
|
||||||
from searx.autocomplete import backends as autocomplete_backends
|
from searx.autocomplete import backends as autocomplete_backends
|
||||||
|
|
||||||
|
|
||||||
static_path, templates_path, themes = get_themes(settings['themes_path'] if \
|
static_path, templates_path, themes =\
|
||||||
settings.get('themes_path', None) else searx_dir)
|
get_themes(settings['themes_path']
|
||||||
|
if settings.get('themes_path')
|
||||||
|
else searx_dir)
|
||||||
default_theme = settings['default_theme'] if \
|
default_theme = settings['default_theme'] if \
|
||||||
settings.get('default_theme', None) else 'default'
|
settings.get('default_theme', None) else 'default'
|
||||||
|
|
||||||
@ -192,8 +195,20 @@ def index():
|
|||||||
search.lang)
|
search.lang)
|
||||||
|
|
||||||
for result in search.results:
|
for result in search.results:
|
||||||
|
|
||||||
if not search.paging and engines[result['engine']].paging:
|
if not search.paging and engines[result['engine']].paging:
|
||||||
search.paging = True
|
search.paging = True
|
||||||
|
|
||||||
|
if settings['server']['https_rewrite']\
|
||||||
|
and result['parsed_url'].scheme == 'http':
|
||||||
|
|
||||||
|
for http_regex, https_url in https_rules:
|
||||||
|
if http_regex.match(result['url']):
|
||||||
|
result['url'] = http_regex.sub(https_url, result['url'])
|
||||||
|
# TODO result['parsed_url'].scheme
|
||||||
|
break
|
||||||
|
|
||||||
|
# HTTPS rewrite
|
||||||
if search.request_data.get('format', 'html') == 'html':
|
if search.request_data.get('format', 'html') == 'html':
|
||||||
if 'content' in result:
|
if 'content' in result:
|
||||||
result['content'] = highlight_content(result['content'],
|
result['content'] = highlight_content(result['content'],
|
||||||
@ -206,6 +221,7 @@ def index():
|
|||||||
# removing html content and whitespace duplications
|
# removing html content and whitespace duplications
|
||||||
result['title'] = ' '.join(html_to_text(result['title'])
|
result['title'] = ' '.join(html_to_text(result['title'])
|
||||||
.strip().split())
|
.strip().split())
|
||||||
|
|
||||||
if len(result['url']) > 74:
|
if len(result['url']) > 74:
|
||||||
url_parts = result['url'][:35], result['url'][-35:]
|
url_parts = result['url'][:35], result['url'][-35:]
|
||||||
result['pretty_url'] = u'{0}[...]{1}'.format(*url_parts)
|
result['pretty_url'] = u'{0}[...]{1}'.format(*url_parts)
|
||||||
|
Loading…
Reference in New Issue
Block a user