mirror of
https://github.com/searxng/searxng.git
synced 2024-11-22 12:10:11 +01:00
Checker: change image check
In the master branch, the checker starts to stream the response and cut the connection. However it creates a lot of read error, which are false negative. I don't know how to fix the issue. This commit change the checker to download the whole image. The error reporting is also changed to report only one line, instead of the whole stacktrace. Also, if a timeout occurs, the checker waits for one second before retry. Do note that I have not test the checker running in background. This feature seems forgotten and lack of interrest despite the initial move few years ago.
This commit is contained in:
parent
42515d98f7
commit
f49d1a9b90
@ -1,11 +1,11 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
|
||||||
import gc
|
import gc
|
||||||
|
import time
|
||||||
import typing
|
import typing
|
||||||
import types
|
import types
|
||||||
import functools
|
import functools
|
||||||
import itertools
|
import itertools
|
||||||
from time import time
|
|
||||||
from timeit import default_timer
|
from timeit import default_timer
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
@ -65,15 +65,15 @@ def _download_and_check_if_image(image_url: str) -> bool:
|
|||||||
This function should not be called directly: use _is_url_image
|
This function should not be called directly: use _is_url_image
|
||||||
otherwise the cache of functools.lru_cache contains data: URL which might be huge.
|
otherwise the cache of functools.lru_cache contains data: URL which might be huge.
|
||||||
"""
|
"""
|
||||||
retry = 2
|
retry = 3
|
||||||
|
|
||||||
while retry > 0:
|
while retry > 0:
|
||||||
a = time()
|
a = default_timer()
|
||||||
try:
|
try:
|
||||||
# use "image_proxy" (avoid HTTP/2)
|
# use "image_proxy" (avoid HTTP/2)
|
||||||
|
network.set_timeout_for_thread(2)
|
||||||
network.set_context_network_name('image_proxy')
|
network.set_context_network_name('image_proxy')
|
||||||
r, stream = network.stream(
|
r = network.get(
|
||||||
'GET',
|
|
||||||
image_url,
|
image_url,
|
||||||
timeout=10.0,
|
timeout=10.0,
|
||||||
allow_redirects=True,
|
allow_redirects=True,
|
||||||
@ -89,19 +89,20 @@ def _download_and_check_if_image(image_url: str) -> bool:
|
|||||||
'Cache-Control': 'max-age=0',
|
'Cache-Control': 'max-age=0',
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
r.close()
|
|
||||||
if r.status_code == 200:
|
if r.status_code == 200:
|
||||||
is_image = r.headers.get('content-type', '').startswith('image/')
|
is_image = r.headers.get('content-type', '').startswith('image/')
|
||||||
else:
|
else:
|
||||||
is_image = False
|
is_image = False
|
||||||
del r
|
del r
|
||||||
del stream
|
|
||||||
return is_image
|
return is_image
|
||||||
except httpx.TimeoutException:
|
except httpx.TimeoutException:
|
||||||
logger.error('Timeout for %s: %i', image_url, int(time() - a))
|
logger.error('Timeout for %s: %i', image_url, int(default_timer() - a))
|
||||||
|
time.sleep(1)
|
||||||
retry -= 1
|
retry -= 1
|
||||||
except httpx.HTTPError:
|
except httpx.HTTPStatusError as e:
|
||||||
logger.exception('Exception for %s', image_url)
|
logger.error('Exception for %s: HTTP status=%i', image_url, e.response.status_code)
|
||||||
|
except httpx.HTTPError as e:
|
||||||
|
logger.error('Exception for %s: %s, %s', image_url, e.__class__.__name__, ",".join(e.args))
|
||||||
return False
|
return False
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user