mirror of
https://gitlab.com/fdroid/fdroidserver.git
synced 2024-11-10 17:30:11 +01:00
7904f12d05
The existing logic from d1ddd525c
in !1225 is confusing because it adds its
own retry loop on top of the retry mechanism that is built into requests.
So this test confirms that setting `download_file(retries=3)` actually
results in more than three retries.
109 lines
3.9 KiB
Python
109 lines
3.9 KiB
Python
#!/usr/bin/env python3
|
|
#
|
|
# net.py - part of the FDroid server tools
|
|
# Copyright (C) 2015 Hans-Christoph Steiner <hans@eds.org>
|
|
# Copyright (C) 2022 FC Stegerman <flx@obfusk.net>
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import logging
|
|
import os
|
|
import requests
|
|
import time
|
|
import urllib
|
|
from requests.adapters import HTTPAdapter, Retry
|
|
from requests.exceptions import ChunkedEncodingError
|
|
|
|
HEADERS = {'User-Agent': 'F-Droid'}
|
|
|
|
|
|
def download_file(url, local_filename=None, dldir='tmp', retries=3, backoff_factor=0.1):
|
|
"""Try hard to download the file, including retrying on failures.
|
|
|
|
This has two retry cycles, one inside of the requests session, the
|
|
other provided by this function. The requests retry logic applies
|
|
to failed DNS lookups, socket connections and connection timeouts,
|
|
never to requests where data has made it to the server. This
|
|
handles ChunkedEncodingError during transfer in its own retry
|
|
loop. This can result in more retries than are specified in the
|
|
retries parameter.
|
|
|
|
"""
|
|
filename = urllib.parse.urlparse(url).path.split('/')[-1]
|
|
if local_filename is None:
|
|
local_filename = os.path.join(dldir, filename)
|
|
for i in range(retries + 1):
|
|
if retries:
|
|
max_retries = Retry(total=retries - i, backoff_factor=backoff_factor)
|
|
adapter = HTTPAdapter(max_retries=max_retries)
|
|
session = requests.Session()
|
|
session.mount('http://', adapter)
|
|
session.mount('https://', adapter)
|
|
else:
|
|
session = requests
|
|
# the stream=True parameter keeps memory usage low
|
|
r = session.get(
|
|
url, stream=True, allow_redirects=True, headers=HEADERS, timeout=300
|
|
)
|
|
r.raise_for_status()
|
|
try:
|
|
with open(local_filename, 'wb') as f:
|
|
for chunk in r.iter_content(chunk_size=1024):
|
|
if chunk: # filter out keep-alive new chunks
|
|
f.write(chunk)
|
|
f.flush()
|
|
return local_filename
|
|
except ChunkedEncodingError as err:
|
|
if i == retries:
|
|
raise err
|
|
logging.warning('Download interrupted, retrying...')
|
|
time.sleep(backoff_factor * 2**i)
|
|
raise ValueError("retries must be >= 0")
|
|
|
|
|
|
def http_get(url, etag=None, timeout=600):
|
|
"""Download the content from the given URL by making a GET request.
|
|
|
|
If an ETag is given, it will do a HEAD request first, to see if the content changed.
|
|
|
|
Parameters
|
|
----------
|
|
url
|
|
The URL to download from.
|
|
etag
|
|
The last ETag to be used for the request (optional).
|
|
|
|
Returns
|
|
-------
|
|
A tuple consisting of:
|
|
- The raw content that was downloaded or None if it did not change
|
|
- The new eTag as returned by the HTTP request
|
|
"""
|
|
# TODO disable TLS Session IDs and TLS Session Tickets
|
|
# (plain text cookie visible to anyone who can see the network traffic)
|
|
if etag:
|
|
r = requests.head(url, headers=HEADERS, timeout=timeout)
|
|
r.raise_for_status()
|
|
if 'ETag' in r.headers and etag == r.headers['ETag']:
|
|
return None, etag
|
|
|
|
r = requests.get(url, headers=HEADERS, timeout=timeout)
|
|
r.raise_for_status()
|
|
|
|
new_etag = None
|
|
if 'ETag' in r.headers:
|
|
new_etag = r.headers['ETag']
|
|
|
|
return r.content, new_etag
|