1
0
mirror of https://github.com/searxng/searxng.git synced 2024-11-15 09:10:12 +01:00

[perf] torrents.html, files.html: don't parse and re-format filesize

This commit is contained in:
Bnyro 2024-06-12 22:35:13 +02:00 committed by Markus Heiser
parent 16ce5612dd
commit e9f8412a6e
13 changed files with 23 additions and 86 deletions

View File

@ -6,7 +6,7 @@
from urllib.parse import quote, urljoin from urllib.parse import quote, urljoin
from lxml import html from lxml import html
from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
# about # about
about = { about = {
@ -40,9 +40,7 @@ def response(resp):
title = extract_text(eval_xpath(result, './td[contains(@class, "name")]/a[2]')) title = extract_text(eval_xpath(result, './td[contains(@class, "name")]/a[2]'))
seed = extract_text(eval_xpath(result, './/td[contains(@class, "seeds")]')) seed = extract_text(eval_xpath(result, './/td[contains(@class, "seeds")]'))
leech = extract_text(eval_xpath(result, './/td[contains(@class, "leeches")]')) leech = extract_text(eval_xpath(result, './/td[contains(@class, "leeches")]'))
filesize_info = extract_text(eval_xpath(result, './/td[contains(@class, "size")]/text()')) filesize = extract_text(eval_xpath(result, './/td[contains(@class, "size")]/text()'))
filesize, filesize_multiplier = filesize_info.split()
filesize = get_torrent_size(filesize, filesize_multiplier)
results.append( results.append(
{ {

View File

@ -36,14 +36,11 @@ Implementations
""" """
import re
from datetime import datetime from datetime import datetime
from urllib.parse import quote from urllib.parse import quote
from lxml import etree from lxml import etree
from searx.utils import get_torrent_size
# about # about
about = { about = {
"website": 'https://bt4gprx.com', "website": 'https://bt4gprx.com',
@ -103,8 +100,6 @@ def response(resp):
title = entry.find("title").text title = entry.find("title").text
link = entry.find("guid").text link = entry.find("guid").text
fullDescription = entry.find("description").text.split('<br>') fullDescription = entry.find("description").text.split('<br>')
filesize = fullDescription[1]
filesizeParsed = re.split(r"([A-Z]+)", filesize)
magnetlink = entry.find("link").text magnetlink = entry.find("link").text
pubDate = entry.find("pubDate").text pubDate = entry.find("pubDate").text
results.append( results.append(
@ -114,7 +109,7 @@ def response(resp):
'magnetlink': magnetlink, 'magnetlink': magnetlink,
'seed': 'N/A', 'seed': 'N/A',
'leech': 'N/A', 'leech': 'N/A',
'filesize': get_torrent_size(filesizeParsed[0], filesizeParsed[1]), 'filesize': fullDescription[1],
'publishedDate': datetime.strptime(pubDate, '%a,%d %b %Y %H:%M:%S %z'), 'publishedDate': datetime.strptime(pubDate, '%a,%d %b %Y %H:%M:%S %z'),
'template': 'torrent.html', 'template': 'torrent.html',
} }

View File

@ -6,7 +6,7 @@
from urllib.parse import quote, urljoin from urllib.parse import quote, urljoin
from lxml import html from lxml import html
from searx.utils import extract_text, get_torrent_size from searx.utils import extract_text
# about # about
about = { about = {
@ -58,13 +58,9 @@ def response(resp):
content = content.strip().replace('\n', ' | ') content = content.strip().replace('\n', ' | ')
content = ' '.join(content.split()) content = ' '.join(content.split())
filesize = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[0] filesize = result.xpath('.//span[@class="torrent_size"]/text()')[0]
filesize_multiplier = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[1]
files = (result.xpath('.//span[@class="torrent_files"]/text()') or ['1'])[0] files = (result.xpath('.//span[@class="torrent_files"]/text()') or ['1'])[0]
# convert filesize to byte if possible
filesize = get_torrent_size(filesize, filesize_multiplier)
# convert files to int if possible # convert files to int if possible
try: try:
files = int(files) files = int(files)

View File

@ -5,7 +5,7 @@
from urllib.parse import urljoin from urllib.parse import urljoin
from lxml import html from lxml import html
from searx.utils import extract_text, get_torrent_size from searx.utils import extract_text
# about # about
about = { about = {
@ -45,7 +45,7 @@ def response(resp):
title = extract_text(result.xpath('.//a[@title]')) title = extract_text(result.xpath('.//a[@title]'))
content = extract_text(result.xpath('.//div[@class="files"]')) content = extract_text(result.xpath('.//div[@class="files"]'))
files_data = extract_text(result.xpath('.//div[@class="tail"]')).split() files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) filesize = f"{files_data[FILESIZE]} {files_data[FILESIZE_MULTIPLIER]}"
magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0] magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
results.append( results.append(

View File

@ -11,7 +11,6 @@ from searx.utils import (
eval_xpath_getindex, eval_xpath_getindex,
eval_xpath_list, eval_xpath_list,
extract_text, extract_text,
get_torrent_size,
int_or_zero, int_or_zero,
) )
@ -54,7 +53,7 @@ def response(resp):
result['content'] = extract_text(eval_xpath(tag, './/span[@class="font11px lightgrey block"]')) result['content'] = extract_text(eval_xpath(tag, './/span[@class="font11px lightgrey block"]'))
result['seed'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "green")]'))) result['seed'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "green")]')))
result['leech'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "red")]'))) result['leech'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "red")]')))
result['filesize'] = get_torrent_size(*extract_text(eval_xpath(tag, './/td[contains(@class, "nobr")]')).split()) result['filesize'] = extract_text(eval_xpath(tag, './/td[contains(@class, "nobr")]'))
results.append(result) results.append(result)

View File

@ -9,7 +9,6 @@ from lxml import html
from searx.utils import ( from searx.utils import (
eval_xpath_getindex, eval_xpath_getindex,
extract_text, extract_text,
get_torrent_size,
int_or_zero, int_or_zero,
) )
@ -99,11 +98,7 @@ def response(resp):
# let's try to calculate the torrent size # let's try to calculate the torrent size
filesize = None filesize = eval_xpath_getindex(result, xpath_filesize, 0, '')
filesize_info = eval_xpath_getindex(result, xpath_filesize, 0, '')
if filesize_info:
filesize_info = result.xpath(xpath_filesize)[0]
filesize = get_torrent_size(*filesize_info.split())
# content string contains all information not included into template # content string contains all information not included into template
content = 'Category: "{category}". Downloaded {downloads} times.' content = 'Category: "{category}". Downloaded {downloads} times.'

View File

@ -8,7 +8,7 @@ from datetime import datetime
from operator import itemgetter from operator import itemgetter
from urllib.parse import quote from urllib.parse import quote
from searx.utils import get_torrent_size from searx.utils import humanize_bytes
# about # about
about = { about = {
@ -80,17 +80,12 @@ def response(resp):
# extract and convert creation date # extract and convert creation date
try: try:
date = datetime.fromtimestamp(float(result["added"])) params['publishedDate'] = datetime.fromtimestamp(float(result["added"]))
params['publishedDate'] = date
except: # pylint: disable=bare-except except: # pylint: disable=bare-except
pass pass
# let's try to calculate the torrent size # let's try to calculate the torrent size
try: params['filesize'] = humanize_bytes(int(result["size"]))
filesize = get_torrent_size(result["size"], "B")
params['filesize'] = filesize
except: # pylint: disable=bare-except
pass
# append result # append result
results.append(params) results.append(params)

View File

@ -14,7 +14,6 @@ from searx.utils import (
eval_xpath, eval_xpath,
eval_xpath_getindex, eval_xpath_getindex,
eval_xpath_list, eval_xpath_list,
get_torrent_size,
) )
about = { about = {
@ -63,7 +62,7 @@ def response(resp):
'leech': extract_text(stats[2]), 'leech': extract_text(stats[2]),
'title': extract_text(title), 'title': extract_text(title),
'url': resp.search_params['base_url'] + url, 'url': resp.search_params['base_url'] + url,
'filesize': get_torrent_size(*extract_text(stats[1]).split()), 'filesize': extract_text(stats[1]),
'magnetlink': magnet, 'magnetlink': magnet,
'torrentfile': torrentfile, 'torrentfile': torrentfile,
'metadata': extract_text(categ), 'metadata': extract_text(categ),

View File

@ -8,7 +8,7 @@ from datetime import datetime
from urllib.parse import urlencode from urllib.parse import urlencode
from lxml import html from lxml import html
from searx.utils import extract_text, get_torrent_size, int_or_zero from searx.utils import extract_text, int_or_zero
# about # about
about = { about = {
@ -49,7 +49,7 @@ def response(resp):
return [] return []
# regular expression for parsing torrent size strings # regular expression for parsing torrent size strings
size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE) size_re = re.compile(r'[\d.]+(T|G|M)?B', re.IGNORECASE)
# processing the results, two rows at a time # processing the results, two rows at a time
for i in range(0, len(rows), 2): for i in range(0, len(rows), 2):
@ -73,9 +73,7 @@ def response(resp):
item = item.strip() item = item.strip()
if item.startswith('Size:'): if item.startswith('Size:'):
try: try:
# ('1.228', 'GB') params['filesize'] = size_re.search(item).group()
groups = size_re.match(item).groups()
params['filesize'] = get_torrent_size(groups[0], groups[1])
except: # pylint: disable=bare-except except: # pylint: disable=bare-except
pass pass
elif item.startswith('Date:'): elif item.startswith('Date:'):

View File

@ -56,6 +56,7 @@ from urllib.parse import quote
from lxml import etree # type: ignore from lxml import etree # type: ignore
from searx.exceptions import SearxEngineAPIException from searx.exceptions import SearxEngineAPIException
from searx.utils import humanize_bytes
if TYPE_CHECKING: if TYPE_CHECKING:
import httpx import httpx
@ -137,11 +138,9 @@ def build_result(item: etree.Element) -> Dict[str, Any]:
if enclosure is not None: if enclosure is not None:
enclosure_url = enclosure.get('url') enclosure_url = enclosure.get('url')
size = get_attribute(item, 'size') filesize = get_attribute(item, 'size')
if not size and enclosure: if not filesize and enclosure:
size = enclosure.get('length') filesize = enclosure.get('length')
if size:
size = int(size)
guid = get_attribute(item, 'guid') guid = get_attribute(item, 'guid')
comments = get_attribute(item, 'comments') comments = get_attribute(item, 'comments')
@ -154,7 +153,7 @@ def build_result(item: etree.Element) -> Dict[str, Any]:
result: Dict[str, Any] = { result: Dict[str, Any] = {
'template': 'torrent.html', 'template': 'torrent.html',
'title': get_attribute(item, 'title'), 'title': get_attribute(item, 'title'),
'filesize': size, 'filesize': humanize_bytes(int(filesize)) if filesize else None,
'files': get_attribute(item, 'files'), 'files': get_attribute(item, 'files'),
'seed': seeders, 'seed': seeders,
'leech': _map_leechers(leechers, seeders, peers), 'leech': _map_leechers(leechers, seeders, peers),

View File

@ -35,14 +35,7 @@
{%- if result.filename %}<tr><td>{{ _('Filename') }}</td><td>{{ result.filename|safe }}</td></tr>{% endif -%} {%- if result.filename %}<tr><td>{{ _('Filename') }}</td><td>{{ result.filename|safe }}</td></tr>{% endif -%}
{%- if result.size %}<tr><td>{{ _('Filesize') }}</td><td> {%- if result.size %}<tr><td>{{ _('Filesize') }}</td><td>{{ result.size|safe }}</td></tr>{%- endif -%}
{%- if result.size < 1024 %}{{ result.size }} {{ _('Bytes') -}}
{%- elif result.size < 1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024) }} {{ _('kiB') -}}
{%- elif result.size < 1024*1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024/1024) }} {{ _('MiB') -}}
{%- elif result.size < 1024*1024*1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024/1024/1024) }} {{ _('GiB') -}}
{%- else %}{{ '{0:0.2f}'.format(result.size/1024/1024/1024/1024) }} {{ _('TiB') }}{% endif -%}
</td></tr>
{%- endif -%}
{%- if result.time %}<tr><td>{{ _('Date') }}</td><td>{{ result.time|safe }}</td></tr>{% endif -%} {%- if result.time %}<tr><td>{{ _('Date') }}</td><td>{{ result.time|safe }}</td></tr>{% endif -%}

View File

@ -8,14 +8,7 @@
{% if result.seed is defined %}<p class="stat"> &bull; {{ icon_big('arrow-swap') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> &bull; {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span></p>{% endif %} {% if result.seed is defined %}<p class="stat"> &bull; {{ icon_big('arrow-swap') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> &bull; {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span></p>{% endif %}
{%- if result.filesize %}<p class="stat">{{ icon_big('floppy-disk') }} {{ _('Filesize') }}<span class="badge"> {%- if result.filesize %}<p class="stat">{{ icon_big('floppy-disk') }} {{ _('Filesize') }}<span class="badge">{{ result.filesize }}</span></p>{%- endif -%}
{%- if result.filesize < 1024 %}{{ result.filesize }} {{ _('Bytes') }}
{%- elif result.filesize < 1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024) }} {{ _('kiB') }}
{%- elif result.filesize < 1024*1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024/1024) }} {{ _('MiB') }}
{%- elif result.filesize < 1024*1024*1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024/1024/1024) }} {{ _('GiB') }}
{%- else %}{{ '{0:0.2f}'.format(result.filesize/1024/1024/1024/1024) }} {{ _('TiB') }}{% endif -%}
</span></p>
{%- endif -%}
{%- if result.files %}<p class="stat">{{ icon_big('file') }} {{ _('Number of Files') }} <span class="badge">{{ result.files }}</span></p>{% endif -%} {%- if result.files %}<p class="stat">{{ icon_big('file') }} {{ _('Number of Files') }} <span class="badge">{{ result.files }}</span></p>{% endif -%}

View File

@ -332,29 +332,6 @@ def dict_subset(dictionary: MutableMapping, properties: Set[str]) -> Dict:
return {k: dictionary[k] for k in properties if k in dictionary} return {k: dictionary[k] for k in properties if k in dictionary}
def get_torrent_size(filesize: str, filesize_multiplier: str) -> Optional[int]:
"""
Args:
* filesize (str): size
* filesize_multiplier (str): TB, GB, .... TiB, GiB...
Returns:
* int: number of bytes
Example:
>>> get_torrent_size('5', 'GB')
5368709120
>>> get_torrent_size('3.14', 'MiB')
3140000
"""
try:
multiplier = _STORAGE_UNIT_VALUE.get(filesize_multiplier, 1)
return int(float(filesize) * multiplier)
except ValueError:
return None
def humanize_bytes(size, precision=2): def humanize_bytes(size, precision=2):
"""Determine the *human readable* value of bytes on 1024 base (1KB=1024B).""" """Determine the *human readable* value of bytes on 1024 base (1KB=1024B)."""
s = ['B ', 'KB', 'MB', 'GB', 'TB'] s = ['B ', 'KB', 'MB', 'GB', 'TB']