From 2e3f6d273a608077a599d7a017137a878891a67e Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Mon, 26 Feb 2024 18:13:53 +0100 Subject: [PATCH] net: download_using_mirrors() to download like fdroidclient does --- fdroidserver/net.py | 87 +++++++++++++++++++++++++++++++++++++++++++-- tests/net.TestCase | 22 ++++++++++++ 2 files changed, 106 insertions(+), 3 deletions(-) diff --git a/fdroidserver/net.py b/fdroidserver/net.py index 49d67f2c..5c6e0144 100644 --- a/fdroidserver/net.py +++ b/fdroidserver/net.py @@ -17,13 +17,20 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import copy import logging import os +import random import requests +import tempfile import time import urllib +import urllib3 from requests.adapters import HTTPAdapter, Retry -from requests.exceptions import ChunkedEncodingError + +from . import _, common + +logger = logging.getLogger(__name__) HEADERS = {'User-Agent': 'F-Droid'} @@ -64,14 +71,88 @@ def download_file(url, local_filename=None, dldir='tmp', retries=3, backoff_fact f.write(chunk) f.flush() return local_filename - except ChunkedEncodingError as err: + except requests.exceptions.ChunkedEncodingError as err: if i == retries: raise err - logging.warning('Download interrupted, retrying...') + logger.warning('Download interrupted, retrying...') time.sleep(backoff_factor * 2**i) raise ValueError("retries must be >= 0") +def download_using_mirrors(mirrors, local_filename=None): + """Try to download the file from any working mirror. + + Download the file that all URLs in the mirrors list point to, + trying all the tricks, starting with the most private methods + first. The list of mirrors is converted into a list of mirror + configurations to try, in order that the should be attempted. + + This builds mirror_configs_to_try using all possible combos to + try. If a mirror is marked with worksWithoutSNI: True, then this + logic will try it twice: first without SNI, then again with SNI. + + """ + mirrors = common.parse_mirrors_config(mirrors) + mirror_configs_to_try = [] + for mirror in mirrors: + mirror_configs_to_try.append(mirror) + if mirror.get('worksWithoutSNI'): + m = copy.deepcopy(mirror) + del m['worksWithoutSNI'] + mirror_configs_to_try.append(m) + + if not local_filename: + for mirror in mirrors: + filename = urllib.parse.urlparse(mirror['url']).path.split('/')[-1] + if filename: + break + if filename: + local_filename = os.path.join(common.get_cachedir(), filename) + else: + local_filename = tempfile.mkstemp(prefix='fdroid-') + + timeouts = (2, 10, 100) + last_exception = None + for timeout in timeouts: + for mirror in mirror_configs_to_try: + last_exception = None + urllib3.util.ssl_.HAS_SNI = not mirror.get('worksWithoutSNI') + try: + # the stream=True parameter keeps memory usage low + r = requests.get( + mirror['url'], + stream=True, + allow_redirects=False, + headers=HEADERS, + # add jitter to the timeout to be less predictable + timeout=timeout + random.randint(0, timeout), # nosec B311 + ) + if r.status_code != 200: + raise requests.exceptions.HTTPError(r.status_code, response=r) + with open(local_filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + f.flush() + return local_filename + except ( + ConnectionError, + requests.exceptions.ChunkedEncodingError, + requests.exceptions.ConnectionError, + requests.exceptions.ContentDecodingError, + requests.exceptions.HTTPError, + requests.exceptions.SSLError, + requests.exceptions.StreamConsumedError, + requests.exceptions.Timeout, + requests.exceptions.UnrewindableBodyError, + ) as e: + last_exception = e + logger.debug(_('Retrying failed download: %s') % str(e)) + # if it hasn't succeeded by now, then give up and raise last exception + if last_exception: + raise last_exception + + def http_get(url, etag=None, timeout=600): """Download the content from the given URL by making a GET request. diff --git a/tests/net.TestCase b/tests/net.TestCase index a50f5925..725bbbfd 100755 --- a/tests/net.TestCase +++ b/tests/net.TestCase @@ -121,6 +121,28 @@ class NetTest(unittest.TestCase): net.download_file('http://localhost:%d/f.txt' % server.port) server.stop() + def test_download_using_mirrors_retries(self): + server = RetryServer() + f = net.download_using_mirrors( + [ + 'https://fake.com/f.txt', # 404 or 301 Redirect + 'https://httpbin.org/status/403', + 'https://httpbin.org/status/500', + 'http://localhost:1/f.txt', # ConnectionError + 'http://localhost:%d/' % server.port, + ], + ) + # strip the HTTP headers and compare the reply + self.assertEqual(server.reply.split(b'\n\n')[1], Path(f).read_bytes()) + server.stop() + + def test_download_using_mirrors_retries_not_forever(self): + """The retry logic should eventually exit with an error.""" + server = RetryServer(failures=5) + with self.assertRaises(requests.exceptions.ConnectionError): + net.download_using_mirrors(['http://localhost:%d/' % server.port]) + server.stop() + if __name__ == "__main__": os.chdir(os.path.dirname(__file__))