mirror of
https://github.com/searxng/searxng.git
synced 2024-11-15 09:10:12 +01:00
[perf] torrents.html, files.html: don't parse and re-format filesize
This commit is contained in:
parent
16ce5612dd
commit
e9f8412a6e
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
from urllib.parse import quote, urljoin
|
from urllib.parse import quote, urljoin
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex
|
from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
@ -40,9 +40,7 @@ def response(resp):
|
|||||||
title = extract_text(eval_xpath(result, './td[contains(@class, "name")]/a[2]'))
|
title = extract_text(eval_xpath(result, './td[contains(@class, "name")]/a[2]'))
|
||||||
seed = extract_text(eval_xpath(result, './/td[contains(@class, "seeds")]'))
|
seed = extract_text(eval_xpath(result, './/td[contains(@class, "seeds")]'))
|
||||||
leech = extract_text(eval_xpath(result, './/td[contains(@class, "leeches")]'))
|
leech = extract_text(eval_xpath(result, './/td[contains(@class, "leeches")]'))
|
||||||
filesize_info = extract_text(eval_xpath(result, './/td[contains(@class, "size")]/text()'))
|
filesize = extract_text(eval_xpath(result, './/td[contains(@class, "size")]/text()'))
|
||||||
filesize, filesize_multiplier = filesize_info.split()
|
|
||||||
filesize = get_torrent_size(filesize, filesize_multiplier)
|
|
||||||
|
|
||||||
results.append(
|
results.append(
|
||||||
{
|
{
|
||||||
|
@ -36,14 +36,11 @@ Implementations
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from searx.utils import get_torrent_size
|
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
"website": 'https://bt4gprx.com',
|
"website": 'https://bt4gprx.com',
|
||||||
@ -103,8 +100,6 @@ def response(resp):
|
|||||||
title = entry.find("title").text
|
title = entry.find("title").text
|
||||||
link = entry.find("guid").text
|
link = entry.find("guid").text
|
||||||
fullDescription = entry.find("description").text.split('<br>')
|
fullDescription = entry.find("description").text.split('<br>')
|
||||||
filesize = fullDescription[1]
|
|
||||||
filesizeParsed = re.split(r"([A-Z]+)", filesize)
|
|
||||||
magnetlink = entry.find("link").text
|
magnetlink = entry.find("link").text
|
||||||
pubDate = entry.find("pubDate").text
|
pubDate = entry.find("pubDate").text
|
||||||
results.append(
|
results.append(
|
||||||
@ -114,7 +109,7 @@ def response(resp):
|
|||||||
'magnetlink': magnetlink,
|
'magnetlink': magnetlink,
|
||||||
'seed': 'N/A',
|
'seed': 'N/A',
|
||||||
'leech': 'N/A',
|
'leech': 'N/A',
|
||||||
'filesize': get_torrent_size(filesizeParsed[0], filesizeParsed[1]),
|
'filesize': fullDescription[1],
|
||||||
'publishedDate': datetime.strptime(pubDate, '%a,%d %b %Y %H:%M:%S %z'),
|
'publishedDate': datetime.strptime(pubDate, '%a,%d %b %Y %H:%M:%S %z'),
|
||||||
'template': 'torrent.html',
|
'template': 'torrent.html',
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
from urllib.parse import quote, urljoin
|
from urllib.parse import quote, urljoin
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.utils import extract_text, get_torrent_size
|
from searx.utils import extract_text
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
@ -58,13 +58,9 @@ def response(resp):
|
|||||||
content = content.strip().replace('\n', ' | ')
|
content = content.strip().replace('\n', ' | ')
|
||||||
content = ' '.join(content.split())
|
content = ' '.join(content.split())
|
||||||
|
|
||||||
filesize = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[0]
|
filesize = result.xpath('.//span[@class="torrent_size"]/text()')[0]
|
||||||
filesize_multiplier = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[1]
|
|
||||||
files = (result.xpath('.//span[@class="torrent_files"]/text()') or ['1'])[0]
|
files = (result.xpath('.//span[@class="torrent_files"]/text()') or ['1'])[0]
|
||||||
|
|
||||||
# convert filesize to byte if possible
|
|
||||||
filesize = get_torrent_size(filesize, filesize_multiplier)
|
|
||||||
|
|
||||||
# convert files to int if possible
|
# convert files to int if possible
|
||||||
try:
|
try:
|
||||||
files = int(files)
|
files = int(files)
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.utils import extract_text, get_torrent_size
|
from searx.utils import extract_text
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
@ -45,7 +45,7 @@ def response(resp):
|
|||||||
title = extract_text(result.xpath('.//a[@title]'))
|
title = extract_text(result.xpath('.//a[@title]'))
|
||||||
content = extract_text(result.xpath('.//div[@class="files"]'))
|
content = extract_text(result.xpath('.//div[@class="files"]'))
|
||||||
files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
|
files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
|
||||||
filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
|
filesize = f"{files_data[FILESIZE]} {files_data[FILESIZE_MULTIPLIER]}"
|
||||||
magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
|
magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
|
||||||
|
|
||||||
results.append(
|
results.append(
|
||||||
|
@ -11,7 +11,6 @@ from searx.utils import (
|
|||||||
eval_xpath_getindex,
|
eval_xpath_getindex,
|
||||||
eval_xpath_list,
|
eval_xpath_list,
|
||||||
extract_text,
|
extract_text,
|
||||||
get_torrent_size,
|
|
||||||
int_or_zero,
|
int_or_zero,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -54,7 +53,7 @@ def response(resp):
|
|||||||
result['content'] = extract_text(eval_xpath(tag, './/span[@class="font11px lightgrey block"]'))
|
result['content'] = extract_text(eval_xpath(tag, './/span[@class="font11px lightgrey block"]'))
|
||||||
result['seed'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "green")]')))
|
result['seed'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "green")]')))
|
||||||
result['leech'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "red")]')))
|
result['leech'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "red")]')))
|
||||||
result['filesize'] = get_torrent_size(*extract_text(eval_xpath(tag, './/td[contains(@class, "nobr")]')).split())
|
result['filesize'] = extract_text(eval_xpath(tag, './/td[contains(@class, "nobr")]'))
|
||||||
|
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|
||||||
|
@ -9,7 +9,6 @@ from lxml import html
|
|||||||
from searx.utils import (
|
from searx.utils import (
|
||||||
eval_xpath_getindex,
|
eval_xpath_getindex,
|
||||||
extract_text,
|
extract_text,
|
||||||
get_torrent_size,
|
|
||||||
int_or_zero,
|
int_or_zero,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -99,11 +98,7 @@ def response(resp):
|
|||||||
|
|
||||||
# let's try to calculate the torrent size
|
# let's try to calculate the torrent size
|
||||||
|
|
||||||
filesize = None
|
filesize = eval_xpath_getindex(result, xpath_filesize, 0, '')
|
||||||
filesize_info = eval_xpath_getindex(result, xpath_filesize, 0, '')
|
|
||||||
if filesize_info:
|
|
||||||
filesize_info = result.xpath(xpath_filesize)[0]
|
|
||||||
filesize = get_torrent_size(*filesize_info.split())
|
|
||||||
|
|
||||||
# content string contains all information not included into template
|
# content string contains all information not included into template
|
||||||
content = 'Category: "{category}". Downloaded {downloads} times.'
|
content = 'Category: "{category}". Downloaded {downloads} times.'
|
||||||
|
@ -8,7 +8,7 @@ from datetime import datetime
|
|||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
|
||||||
from urllib.parse import quote
|
from urllib.parse import quote
|
||||||
from searx.utils import get_torrent_size
|
from searx.utils import humanize_bytes
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
@ -80,17 +80,12 @@ def response(resp):
|
|||||||
|
|
||||||
# extract and convert creation date
|
# extract and convert creation date
|
||||||
try:
|
try:
|
||||||
date = datetime.fromtimestamp(float(result["added"]))
|
params['publishedDate'] = datetime.fromtimestamp(float(result["added"]))
|
||||||
params['publishedDate'] = date
|
|
||||||
except: # pylint: disable=bare-except
|
except: # pylint: disable=bare-except
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# let's try to calculate the torrent size
|
# let's try to calculate the torrent size
|
||||||
try:
|
params['filesize'] = humanize_bytes(int(result["size"]))
|
||||||
filesize = get_torrent_size(result["size"], "B")
|
|
||||||
params['filesize'] = filesize
|
|
||||||
except: # pylint: disable=bare-except
|
|
||||||
pass
|
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
results.append(params)
|
results.append(params)
|
||||||
|
@ -14,7 +14,6 @@ from searx.utils import (
|
|||||||
eval_xpath,
|
eval_xpath,
|
||||||
eval_xpath_getindex,
|
eval_xpath_getindex,
|
||||||
eval_xpath_list,
|
eval_xpath_list,
|
||||||
get_torrent_size,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
about = {
|
about = {
|
||||||
@ -63,7 +62,7 @@ def response(resp):
|
|||||||
'leech': extract_text(stats[2]),
|
'leech': extract_text(stats[2]),
|
||||||
'title': extract_text(title),
|
'title': extract_text(title),
|
||||||
'url': resp.search_params['base_url'] + url,
|
'url': resp.search_params['base_url'] + url,
|
||||||
'filesize': get_torrent_size(*extract_text(stats[1]).split()),
|
'filesize': extract_text(stats[1]),
|
||||||
'magnetlink': magnet,
|
'magnetlink': magnet,
|
||||||
'torrentfile': torrentfile,
|
'torrentfile': torrentfile,
|
||||||
'metadata': extract_text(categ),
|
'metadata': extract_text(categ),
|
||||||
|
@ -8,7 +8,7 @@ from datetime import datetime
|
|||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from searx.utils import extract_text, get_torrent_size, int_or_zero
|
from searx.utils import extract_text, int_or_zero
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
@ -49,7 +49,7 @@ def response(resp):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
# regular expression for parsing torrent size strings
|
# regular expression for parsing torrent size strings
|
||||||
size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
|
size_re = re.compile(r'[\d.]+(T|G|M)?B', re.IGNORECASE)
|
||||||
|
|
||||||
# processing the results, two rows at a time
|
# processing the results, two rows at a time
|
||||||
for i in range(0, len(rows), 2):
|
for i in range(0, len(rows), 2):
|
||||||
@ -73,9 +73,7 @@ def response(resp):
|
|||||||
item = item.strip()
|
item = item.strip()
|
||||||
if item.startswith('Size:'):
|
if item.startswith('Size:'):
|
||||||
try:
|
try:
|
||||||
# ('1.228', 'GB')
|
params['filesize'] = size_re.search(item).group()
|
||||||
groups = size_re.match(item).groups()
|
|
||||||
params['filesize'] = get_torrent_size(groups[0], groups[1])
|
|
||||||
except: # pylint: disable=bare-except
|
except: # pylint: disable=bare-except
|
||||||
pass
|
pass
|
||||||
elif item.startswith('Date:'):
|
elif item.startswith('Date:'):
|
||||||
|
@ -56,6 +56,7 @@ from urllib.parse import quote
|
|||||||
from lxml import etree # type: ignore
|
from lxml import etree # type: ignore
|
||||||
|
|
||||||
from searx.exceptions import SearxEngineAPIException
|
from searx.exceptions import SearxEngineAPIException
|
||||||
|
from searx.utils import humanize_bytes
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import httpx
|
import httpx
|
||||||
@ -137,11 +138,9 @@ def build_result(item: etree.Element) -> Dict[str, Any]:
|
|||||||
if enclosure is not None:
|
if enclosure is not None:
|
||||||
enclosure_url = enclosure.get('url')
|
enclosure_url = enclosure.get('url')
|
||||||
|
|
||||||
size = get_attribute(item, 'size')
|
filesize = get_attribute(item, 'size')
|
||||||
if not size and enclosure:
|
if not filesize and enclosure:
|
||||||
size = enclosure.get('length')
|
filesize = enclosure.get('length')
|
||||||
if size:
|
|
||||||
size = int(size)
|
|
||||||
|
|
||||||
guid = get_attribute(item, 'guid')
|
guid = get_attribute(item, 'guid')
|
||||||
comments = get_attribute(item, 'comments')
|
comments = get_attribute(item, 'comments')
|
||||||
@ -154,7 +153,7 @@ def build_result(item: etree.Element) -> Dict[str, Any]:
|
|||||||
result: Dict[str, Any] = {
|
result: Dict[str, Any] = {
|
||||||
'template': 'torrent.html',
|
'template': 'torrent.html',
|
||||||
'title': get_attribute(item, 'title'),
|
'title': get_attribute(item, 'title'),
|
||||||
'filesize': size,
|
'filesize': humanize_bytes(int(filesize)) if filesize else None,
|
||||||
'files': get_attribute(item, 'files'),
|
'files': get_attribute(item, 'files'),
|
||||||
'seed': seeders,
|
'seed': seeders,
|
||||||
'leech': _map_leechers(leechers, seeders, peers),
|
'leech': _map_leechers(leechers, seeders, peers),
|
||||||
|
@ -35,14 +35,7 @@
|
|||||||
|
|
||||||
{%- if result.filename %}<tr><td>{{ _('Filename') }}</td><td>{{ result.filename|safe }}</td></tr>{% endif -%}
|
{%- if result.filename %}<tr><td>{{ _('Filename') }}</td><td>{{ result.filename|safe }}</td></tr>{% endif -%}
|
||||||
|
|
||||||
{%- if result.size %}<tr><td>{{ _('Filesize') }}</td><td>
|
{%- if result.size %}<tr><td>{{ _('Filesize') }}</td><td>{{ result.size|safe }}</td></tr>{%- endif -%}
|
||||||
{%- if result.size < 1024 %}{{ result.size }} {{ _('Bytes') -}}
|
|
||||||
{%- elif result.size < 1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024) }} {{ _('kiB') -}}
|
|
||||||
{%- elif result.size < 1024*1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024/1024) }} {{ _('MiB') -}}
|
|
||||||
{%- elif result.size < 1024*1024*1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024/1024/1024) }} {{ _('GiB') -}}
|
|
||||||
{%- else %}{{ '{0:0.2f}'.format(result.size/1024/1024/1024/1024) }} {{ _('TiB') }}{% endif -%}
|
|
||||||
</td></tr>
|
|
||||||
{%- endif -%}
|
|
||||||
|
|
||||||
{%- if result.time %}<tr><td>{{ _('Date') }}</td><td>{{ result.time|safe }}</td></tr>{% endif -%}
|
{%- if result.time %}<tr><td>{{ _('Date') }}</td><td>{{ result.time|safe }}</td></tr>{% endif -%}
|
||||||
|
|
||||||
|
@ -8,14 +8,7 @@
|
|||||||
|
|
||||||
{% if result.seed is defined %}<p class="stat"> • {{ icon_big('arrow-swap') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> • {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span></p>{% endif %}
|
{% if result.seed is defined %}<p class="stat"> • {{ icon_big('arrow-swap') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> • {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span></p>{% endif %}
|
||||||
|
|
||||||
{%- if result.filesize %}<p class="stat">{{ icon_big('floppy-disk') }} {{ _('Filesize') }}<span class="badge">
|
{%- if result.filesize %}<p class="stat">{{ icon_big('floppy-disk') }} {{ _('Filesize') }}<span class="badge">{{ result.filesize }}</span></p>{%- endif -%}
|
||||||
{%- if result.filesize < 1024 %}{{ result.filesize }} {{ _('Bytes') }}
|
|
||||||
{%- elif result.filesize < 1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024) }} {{ _('kiB') }}
|
|
||||||
{%- elif result.filesize < 1024*1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024/1024) }} {{ _('MiB') }}
|
|
||||||
{%- elif result.filesize < 1024*1024*1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024/1024/1024) }} {{ _('GiB') }}
|
|
||||||
{%- else %}{{ '{0:0.2f}'.format(result.filesize/1024/1024/1024/1024) }} {{ _('TiB') }}{% endif -%}
|
|
||||||
</span></p>
|
|
||||||
{%- endif -%}
|
|
||||||
|
|
||||||
{%- if result.files %}<p class="stat">{{ icon_big('file') }} {{ _('Number of Files') }} <span class="badge">{{ result.files }}</span></p>{% endif -%}
|
{%- if result.files %}<p class="stat">{{ icon_big('file') }} {{ _('Number of Files') }} <span class="badge">{{ result.files }}</span></p>{% endif -%}
|
||||||
|
|
||||||
|
@ -332,29 +332,6 @@ def dict_subset(dictionary: MutableMapping, properties: Set[str]) -> Dict:
|
|||||||
return {k: dictionary[k] for k in properties if k in dictionary}
|
return {k: dictionary[k] for k in properties if k in dictionary}
|
||||||
|
|
||||||
|
|
||||||
def get_torrent_size(filesize: str, filesize_multiplier: str) -> Optional[int]:
|
|
||||||
"""
|
|
||||||
|
|
||||||
Args:
|
|
||||||
* filesize (str): size
|
|
||||||
* filesize_multiplier (str): TB, GB, .... TiB, GiB...
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
* int: number of bytes
|
|
||||||
|
|
||||||
Example:
|
|
||||||
>>> get_torrent_size('5', 'GB')
|
|
||||||
5368709120
|
|
||||||
>>> get_torrent_size('3.14', 'MiB')
|
|
||||||
3140000
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
multiplier = _STORAGE_UNIT_VALUE.get(filesize_multiplier, 1)
|
|
||||||
return int(float(filesize) * multiplier)
|
|
||||||
except ValueError:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def humanize_bytes(size, precision=2):
|
def humanize_bytes(size, precision=2):
|
||||||
"""Determine the *human readable* value of bytes on 1024 base (1KB=1024B)."""
|
"""Determine the *human readable* value of bytes on 1024 base (1KB=1024B)."""
|
||||||
s = ['B ', 'KB', 'MB', 'GB', 'TB']
|
s = ['B ', 'KB', 'MB', 'GB', 'TB']
|
||||||
|
Loading…
Reference in New Issue
Block a user