From 9a4f3ac019d3dc3d9154db6b434f717623c4c0e2 Mon Sep 17 00:00:00 2001 From: Marcus Hoffmann Date: Mon, 15 Jan 2018 01:08:23 +0100 Subject: [PATCH 1/5] Revert "build: bump max_apps_per_run to 50" This reverts commit 56a53055bef857d7cf0f76eb7b485a2f6e85ac2a. Revert "build: limit --all to 10 apps at a time" This reverts commit afc5cc6b6a969bb1f12eb325df436eaadac53099. --- fdroidserver/build.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fdroidserver/build.py b/fdroidserver/build.py index a431dd52..61cda139 100644 --- a/fdroidserver/build.py +++ b/fdroidserver/build.py @@ -1138,11 +1138,7 @@ def main(): # Build applications... failed_apps = {} build_succeeded = [] - max_apps_per_run = 50 for appid, app in apps.items(): - max_apps_per_run -= 1 - if max_apps_per_run < 1: - break first = True From 85985074d45832c8e0fb9295eda1f2614b42f7e3 Mon Sep 17 00:00:00 2001 From: Marcus Hoffmann Date: Mon, 15 Jan 2018 01:03:47 +0100 Subject: [PATCH 2/5] build: enable watchdog timer for each build that kills in 2 hours This introduces locking for the commonly used vagrant functions in vmtools because vagrant fails when another vagrant command is already running. --- fdroidserver/build.py | 17 +++++++++++++++++ fdroidserver/vmtools.py | 32 ++++++++++++++++++++------------ 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/fdroidserver/build.py b/fdroidserver/build.py index 61cda139..cb030ebb 100644 --- a/fdroidserver/build.py +++ b/fdroidserver/build.py @@ -25,6 +25,7 @@ import re import resource import sys import tarfile +import threading import traceback import time import requests @@ -978,6 +979,13 @@ def trybuild(app, build, build_dir, output_dir, log_dir, also_check_dir, return True +def force_halt_build(): + """Halt the currently running Vagrant VM, to be called from a Timer""" + logging.error(_('Force halting build after timeout!')) + vm = vmtools.get_build_vm('builder') + vm.halt() + + def parse_commandline(): """Parse the command line. Returns options, parser.""" @@ -1143,6 +1151,12 @@ def main(): first = True for build in app.builds: + if options.server: # enable watchdog timer + timer = threading.Timer(7200, force_halt_build) + timer.start() + else: + timer = None + wikilog = None build_starttime = common.get_wiki_timestamp() tools_version_log = '' @@ -1283,6 +1297,9 @@ def main(): except Exception as e: logging.error("Error while attempting to publish build log: %s" % e) + if timer: + timer.cancel() # kill the watchdog timer + for app in build_succeeded: logging.info("success: %s" % (app.id)) diff --git a/fdroidserver/vmtools.py b/fdroidserver/vmtools.py index 6671a3eb..33544ac5 100644 --- a/fdroidserver/vmtools.py +++ b/fdroidserver/vmtools.py @@ -29,6 +29,9 @@ from .common import FDroidException from logging import getLogger from fdroidserver import _ +import threading + +lock = threading.Lock() logger = getLogger('fdroidserver-vmtools') @@ -175,7 +178,6 @@ class FDroidBuildVm(): This is intended to be a hypervisor independant, fault tolerant wrapper around the vagrant functions we use. """ - def __init__(self, srvdir): """Create new server class. """ @@ -191,21 +193,27 @@ class FDroidBuildVm(): self.vgrnt = vagrant.Vagrant(root=srvdir, out_cm=vagrant.stdout_cm, err_cm=vagrant.stdout_cm) def up(self, provision=True): - try: - self.vgrnt.up(provision=provision) - self.srvuuid = self._vagrant_fetch_uuid() - except subprocess.CalledProcessError as e: - raise FDroidBuildVmException("could not bring up vm '%s'" % self.srvname) from e + global lock + with lock: + try: + self.vgrnt.up(provision=provision) + self.srvuuid = self._vagrant_fetch_uuid() + except subprocess.CalledProcessError as e: + raise FDroidBuildVmException("could not bring up vm '%s'" % self.srvname) from e def suspend(self): - logger.info('suspending buildserver') - try: - self.vgrnt.suspend() - except subprocess.CalledProcessError as e: - raise FDroidBuildVmException("could not suspend vm '%s'" % self.srvname) from e + global lock + with lock: + logger.info('suspending buildserver') + try: + self.vgrnt.suspend() + except subprocess.CalledProcessError as e: + raise FDroidBuildVmException("could not suspend vm '%s'" % self.srvname) from e def halt(self): - self.vgrnt.halt(force=True) + global lock + with lock: + self.vgrnt.halt(force=True) def destroy(self): """Remove every trace of this VM from the system. From 80e121d182855db634a9e8010a661dec666940f4 Mon Sep 17 00:00:00 2001 From: Marcus Hoffmann Date: Sat, 20 Jan 2018 22:16:39 +0100 Subject: [PATCH 3/5] build: log timeouts to the wiki --- fdroidserver/build.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fdroidserver/build.py b/fdroidserver/build.py index cb030ebb..9e2a7a3f 100644 --- a/fdroidserver/build.py +++ b/fdroidserver/build.py @@ -240,9 +240,12 @@ def build_server(app, build, vcs, build_dir, output_dir, log_dir, force): logging.info("...getting exit status") returncode = chan.recv_exit_status() if returncode != 0: - raise BuildException( - "Build.py failed on server for {0}:{1}".format( - app.id, build.versionName), None if options.verbose else str(output, 'utf-8')) + if timeout_event.is_set(): + message = "Timeout exceeded! Build VM force-stopped for {0}:{1}" + else: + message = "Build.py failed on server for {0}:{1}" + raise BuildException(message.format(app.id, build.versionName), + None if options.verbose else str(output, 'utf-8')) # Retreive logs... toolsversion_log = common.get_toolsversion_logname(app, build) @@ -982,6 +985,7 @@ def trybuild(app, build, build_dir, output_dir, log_dir, also_check_dir, def force_halt_build(): """Halt the currently running Vagrant VM, to be called from a Timer""" logging.error(_('Force halting build after timeout!')) + timeout_event.set() vm = vmtools.get_build_vm('builder') vm.halt() @@ -1037,6 +1041,7 @@ config = None buildserverid = None fdroidserverid = None start_timestamp = time.gmtime() +timeout_event = threading.Event() def main(): From fa43066f8df414f715f9f88858d638d9d39cb2e1 Mon Sep 17 00:00:00 2001 From: Marcus Hoffmann Date: Fri, 19 Jan 2018 22:35:06 +0100 Subject: [PATCH 4/5] build: add global soft timeout of 12 hours Only start new builds for 12 hours. This ensures we publish new builds often enough even on long backlogs. This could be made configurable at a later point. --- fdroidserver/build.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fdroidserver/build.py b/fdroidserver/build.py index 9e2a7a3f..05700cf7 100644 --- a/fdroidserver/build.py +++ b/fdroidserver/build.py @@ -1151,11 +1151,17 @@ def main(): # Build applications... failed_apps = {} build_succeeded = [] + # Only build for 12 hours, then stop gracefully + endtime = time.time() + 12 * 60 * 60 + max_build_time_reached = False for appid, app in apps.items(): first = True for build in app.builds: + if time.time() > endtime: + max_build_time_reached = True + break if options.server: # enable watchdog timer timer = threading.Timer(7200, force_halt_build) timer.start() @@ -1305,6 +1311,10 @@ def main(): if timer: timer.cancel() # kill the watchdog timer + if max_build_time_reached: + logging.info("Stopping after global build timeout...") + break + for app in build_succeeded: logging.info("success: %s" % (app.id)) From a1a88e1c6aa8f01afe83aea484c770ef69f0ef06 Mon Sep 17 00:00:00 2001 From: Marcus Hoffmann Date: Sat, 20 Jan 2018 20:48:02 +0100 Subject: [PATCH 5/5] main: force exit on keyboard interrupt This applies the same workaround as b8ed892ad9ed7e125278b. --- fdroid | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fdroid b/fdroid index 74339df0..a66df6b1 100755 --- a/fdroid +++ b/fdroid @@ -18,6 +18,7 @@ # along with this program. If not, see . import sys +import os import logging import fdroidserver.common @@ -73,7 +74,6 @@ def main(): print_help() sys.exit(0) elif command == '--version': - import os.path output = _('no version info found!') cmddir = os.path.realpath(os.path.dirname(__file__)) moduledir = os.path.realpath(os.path.dirname(fdroidserver.common.__file__) + '/..') @@ -143,7 +143,9 @@ def main(): sys.exit(1) except KeyboardInterrupt: print('') - sys.exit(1) + sys.stdout.flush() + sys.stderr.flush() + os._exit(1) # These should only be unexpected crashes due to bugs in the code # str(e) often doesn't contain a reason, so just show the backtrace except Exception as e: