From a3cecc16a37cc2eec02b5bcf2a382d28885270c9 Mon Sep 17 00:00:00 2001 From: Hans-Christoph Steiner Date: Tue, 18 Sep 2018 15:29:24 +0200 Subject: [PATCH] use partial androguard binary XML parsing to speed up APK ID lookup Normally, androguard parses the entire APK before it is possible to get any values from it. This uses androguard primitives to only attempt to parse the AndroidManifest.xml, then to quit as soon as it gets what it needs. This greatly speeds up the parsing (1 minute vs 60 minutes). fdroid/fdroidserver#557 --- fdroidserver/common.py | 67 ++++++++++++++++++++++++++++++++++++++---- tests/common.TestCase | 14 ++++----- 2 files changed, 69 insertions(+), 12 deletions(-) diff --git a/fdroidserver/common.py b/fdroidserver/common.py index 7b8842a1..53c4d41a 100644 --- a/fdroidserver/common.py +++ b/fdroidserver/common.py @@ -2096,26 +2096,83 @@ def is_apk_and_debuggable(apkfile): def get_apk_id(apkfile): - """Extract identification information from APK using aapt. + """Extract identification information from APK. + + Androguard is preferred since it is more reliable and a lot + faster. Occasionally, when androguard can't get the info from the + APK, aapt still can. So aapt is also used as the final fallback + method. :param apkfile: path to an APK file. :returns: triplet (appid, version code, version name) + """ if use_androguard(): - return get_apk_id_androguard(apkfile) + try: + return get_apk_id_androguard(apkfile) + except zipfile.BadZipFile as e: + logging.error(apkfile + ': ' + str(e)) + if 'aapt' in config: + return get_apk_id_aapt(apkfile) else: return get_apk_id_aapt(apkfile) def get_apk_id_androguard(apkfile): + """Read (appid, versionCode, versionName) from an APK + + This first tries to do quick binary XML parsing to just get the + values that are needed. It will fallback to full androguard + parsing, which is slow, if it can't find the versionName value or + versionName is set to a Android String Resource (e.g. an integer + hex value that starts with @). + + """ if not os.path.exists(apkfile): raise FDroidException(_("Reading packageName/versionCode/versionName failed, APK invalid: '{apkfilename}'") .format(apkfilename=apkfile)) - a = _get_androguard_APK(apkfile) - versionName = ensure_final_value(a.package, a.get_android_resources(), a.get_androidversion_name()) + + from androguard.core.bytecodes.axml import AXMLParser, format_value, START_TAG, END_TAG, TEXT, END_DOCUMENT + + appid = None + versionCode = None + versionName = None + with zipfile.ZipFile(apkfile) as apk: + with apk.open('AndroidManifest.xml') as manifest: + axml = AXMLParser(manifest.read()) + count = 0 + while axml.is_valid(): + _type = next(axml) + count += 1 + if _type == START_TAG: + for i in range(0, axml.getAttributeCount()): + name = axml.getAttributeName(i) + _type = axml.getAttributeValueType(i) + _data = axml.getAttributeValueData(i) + value = format_value(_type, _data, lambda _: axml.getAttributeValue(i)) + if appid is None and name == 'package': + appid = value + elif versionCode is None and name == 'versionCode': + if value.startswith('0x'): + versionCode = str(int(value, 16)) + else: + versionCode = value + elif versionName is None and name == 'versionName': + versionName = value + + if axml.getName() == 'manifest': + break + elif _type == END_TAG or _type == TEXT or _type == END_DOCUMENT: + raise RuntimeError('{path}: must be the first element in AndroidManifest.xml' + .format(path=apkfile)) + + if not versionName or versionName[0] == '@': + a = _get_androguard_APK(apkfile) + versionName = ensure_final_value(a.package, a.get_android_resources(), a.get_androidversion_name()) if not versionName: versionName = '' # versionName is expected to always be a str - return a.package, a.get_androidversion_code(), versionName + + return appid, versionCode, versionName.strip('\0') def get_apk_id_aapt(apkfile): diff --git a/tests/common.TestCase b/tests/common.TestCase index f1449392..841da524 100755 --- a/tests/common.TestCase +++ b/tests/common.TestCase @@ -611,14 +611,14 @@ class CommonTest(unittest.TestCase): for apkfilename, appid, versionCode, versionName in testcases: if 'aapt' in config: a, vc, vn = fdroidserver.common.get_apk_id_aapt(apkfilename) - self.assertEqual(appid, a) - self.assertEqual(versionCode, vc) - self.assertEqual(versionName, vn) + self.assertEqual(appid, a, 'aapt appid parsing failed for ' + apkfilename) + self.assertEqual(versionCode, vc, 'aapt versionCode parsing failed for ' + apkfilename) + self.assertEqual(versionName, vn, 'aapt versionName parsing failed for ' + apkfilename) if fdroidserver.common.use_androguard(): - a, vc, vn = fdroidserver.common.get_apk_id_androguard(apkfilename) - self.assertEqual(appid, a) - self.assertEqual(versionCode, vc) - self.assertEqual(versionName, vn) + a, vc, vn = fdroidserver.common.get_apk_id(apkfilename) + self.assertEqual(appid, a, 'androguard appid parsing failed for ' + apkfilename) + self.assertEqual(versionName, vn, 'androguard versionName parsing failed for ' + apkfilename) + self.assertEqual(versionCode, vc, 'androguard versionCode parsing failed for ' + apkfilename) with self.assertRaises(FDroidException): fdroidserver.common.get_apk_id('nope')