From 7c89e923f63061279d707d7679df3a4e46d645f8 Mon Sep 17 00:00:00 2001 From: FestplattenSchnitzel Date: Sat, 6 Aug 2022 10:21:12 +0200 Subject: [PATCH] Move methods specific to import to it's module --- fdroidserver/common.py | 124 ---------------------------- fdroidserver/import_subcommand.py | 131 +++++++++++++++++++++++++++++- tests/common.TestCase | 13 --- tests/import_subcommand.TestCase | 19 ++++- 4 files changed, 145 insertions(+), 142 deletions(-) diff --git a/fdroidserver/common.py b/fdroidserver/common.py index b4217f94..1eaa2e2f 100644 --- a/fdroidserver/common.py +++ b/fdroidserver/common.py @@ -45,8 +45,6 @@ import logging import hashlib import socket import base64 -import urllib.parse -import urllib.request import yaml import zipfile import tempfile @@ -1944,121 +1942,6 @@ def get_gradle_subdir(build_dir, paths): return -def getrepofrompage(url): - """Get the repo type and address from the given web page. - - The page is scanned in a rather naive manner for 'git clone xxxx', - 'hg clone xxxx', etc, and when one of these is found it's assumed - that's the information we want. Returns repotype, address, or - None, reason - - """ - if not url.startswith('http'): - return (None, _('{url} does not start with "http"!'.format(url=url))) - req = urllib.request.urlopen(url) # nosec B310 non-http URLs are filtered out - if req.getcode() != 200: - return (None, 'Unable to get ' + url + ' - return code ' + str(req.getcode())) - page = req.read().decode(req.headers.get_content_charset()) - - # Works for BitBucket - m = re.search('data-fetch-url="(.*)"', page) - if m is not None: - repo = m.group(1) - - if repo.endswith('.git'): - return ('git', repo) - - return ('hg', repo) - - # Works for BitBucket (obsolete) - index = page.find('hg clone') - if index != -1: - repotype = 'hg' - repo = page[index + 9:] - index = repo.find('<') - if index == -1: - return (None, _("Error while getting repo address")) - repo = repo[:index] - repo = repo.split('"')[0] - return (repotype, repo) - - # Works for BitBucket (obsolete) - index = page.find('git clone') - if index != -1: - repotype = 'git' - repo = page[index + 10:] - index = repo.find('<') - if index == -1: - return (None, _("Error while getting repo address")) - repo = repo[:index] - repo = repo.split('"')[0] - return (repotype, repo) - - return (None, _("No information found.") + page) - - -def get_app_from_url(url): - """Guess basic app metadata from the URL. - - The URL must include a network hostname, unless it is an lp:, - file:, or git/ssh URL. This throws ValueError on bad URLs to - match urlparse(). - - """ - parsed = urllib.parse.urlparse(url) - invalid_url = False - if not parsed.scheme or not parsed.path: - invalid_url = True - - app = fdroidserver.metadata.App() - app.Repo = url - if url.startswith('git://') or url.startswith('git@'): - app.RepoType = 'git' - elif parsed.netloc == 'github.com': - app.RepoType = 'git' - app.SourceCode = url - app.IssueTracker = url + '/issues' - elif parsed.netloc == 'gitlab.com' or parsed.netloc == 'framagit.org': - # git can be fussy with gitlab URLs unless they end in .git - if url.endswith('.git'): - url = url[:-4] - app.Repo = url + '.git' - app.RepoType = 'git' - app.SourceCode = url - app.IssueTracker = url + '/issues' - elif parsed.netloc == 'notabug.org': - if url.endswith('.git'): - url = url[:-4] - app.Repo = url + '.git' - app.RepoType = 'git' - app.SourceCode = url - app.IssueTracker = url + '/issues' - elif parsed.netloc == 'bitbucket.org': - if url.endswith('/'): - url = url[:-1] - app.SourceCode = url + '/src' - app.IssueTracker = url + '/issues' - # Figure out the repo type and adddress... - app.RepoType, app.Repo = getrepofrompage(url) - elif parsed.netloc == 'codeberg.org': - app.RepoType = 'git' - app.SourceCode = url - app.IssueTracker = url + '/issues' - elif url.startswith('https://') and url.endswith('.git'): - app.RepoType = 'git' - - if not parsed.netloc and parsed.scheme in ('git', 'http', 'https', 'ssh'): - invalid_url = True - - if invalid_url: - raise ValueError(_('"{url}" is not a valid URL!'.format(url=url))) - - if not app.RepoType: - raise FDroidException("Unable to determine vcs type. " + app.Repo) - - return app - - def parse_srclib_spec(spec): if type(spec) != str: @@ -4609,10 +4492,3 @@ NDKS = [ "url": "https://dl.google.com/android/repository/android-ndk-r25b-linux.zip" } ] - - -def handle_retree_error_on_windows(function, path, excinfo): - """Python can't remove a readonly file on Windows so chmod first.""" - if function in (os.unlink, os.rmdir, os.remove) and excinfo[0] == PermissionError: - os.chmod(path, stat.S_IWRITE) - function(path) diff --git a/fdroidserver/import_subcommand.py b/fdroidserver/import_subcommand.py index 63aefe15..e32a9f90 100644 --- a/fdroidserver/import_subcommand.py +++ b/fdroidserver/import_subcommand.py @@ -18,6 +18,11 @@ # along with this program. If not, see . import configparser +import os +import re +import stat +import urllib + import git import json import shutil @@ -42,6 +47,13 @@ config = None options = None +def handle_retree_error_on_windows(function, path, excinfo): + """Python can't remove a readonly file on Windows so chmod first.""" + if function in (os.unlink, os.rmdir, os.remove) and excinfo[0] == PermissionError: + os.chmod(path, stat.S_IWRITE) + function(path) + + def clone_to_tmp_dir(app): tmp_dir = Path('tmp') tmp_dir.mkdir(exist_ok=True) @@ -49,13 +61,128 @@ def clone_to_tmp_dir(app): tmp_dir = tmp_dir / 'importer' if tmp_dir.exists(): - shutil.rmtree(str(tmp_dir), onerror=common.handle_retree_error_on_windows) + shutil.rmtree(str(tmp_dir), onerror=handle_retree_error_on_windows) vcs = common.getvcs(app.RepoType, app.Repo, tmp_dir) vcs.gotorevision(options.rev) return tmp_dir +def getrepofrompage(url): + """Get the repo type and address from the given web page. + + The page is scanned in a rather naive manner for 'git clone xxxx', + 'hg clone xxxx', etc, and when one of these is found it's assumed + that's the information we want. Returns repotype, address, or + None, reason + + """ + if not url.startswith('http'): + return (None, _('{url} does not start with "http"!'.format(url=url))) + req = urllib.request.urlopen(url) # nosec B310 non-http URLs are filtered out + if req.getcode() != 200: + return (None, 'Unable to get ' + url + ' - return code ' + str(req.getcode())) + page = req.read().decode(req.headers.get_content_charset()) + + # Works for BitBucket + m = re.search('data-fetch-url="(.*)"', page) + if m is not None: + repo = m.group(1) + + if repo.endswith('.git'): + return ('git', repo) + + return ('hg', repo) + + # Works for BitBucket (obsolete) + index = page.find('hg clone') + if index != -1: + repotype = 'hg' + repo = page[index + 9:] + index = repo.find('<') + if index == -1: + return (None, _("Error while getting repo address")) + repo = repo[:index] + repo = repo.split('"')[0] + return (repotype, repo) + + # Works for BitBucket (obsolete) + index = page.find('git clone') + if index != -1: + repotype = 'git' + repo = page[index + 10:] + index = repo.find('<') + if index == -1: + return (None, _("Error while getting repo address")) + repo = repo[:index] + repo = repo.split('"')[0] + return (repotype, repo) + + return (None, _("No information found.") + page) + + +def get_app_from_url(url): + """Guess basic app metadata from the URL. + + The URL must include a network hostname, unless it is an lp:, + file:, or git/ssh URL. This throws ValueError on bad URLs to + match urlparse(). + + """ + parsed = urllib.parse.urlparse(url) + invalid_url = False + if not parsed.scheme or not parsed.path: + invalid_url = True + + app = metadata.App() + app.Repo = url + if url.startswith('git://') or url.startswith('git@'): + app.RepoType = 'git' + elif parsed.netloc == 'github.com': + app.RepoType = 'git' + app.SourceCode = url + app.IssueTracker = url + '/issues' + elif parsed.netloc == 'gitlab.com' or parsed.netloc == 'framagit.org': + # git can be fussy with gitlab URLs unless they end in .git + if url.endswith('.git'): + url = url[:-4] + app.Repo = url + '.git' + app.RepoType = 'git' + app.SourceCode = url + app.IssueTracker = url + '/issues' + elif parsed.netloc == 'notabug.org': + if url.endswith('.git'): + url = url[:-4] + app.Repo = url + '.git' + app.RepoType = 'git' + app.SourceCode = url + app.IssueTracker = url + '/issues' + elif parsed.netloc == 'bitbucket.org': + if url.endswith('/'): + url = url[:-1] + app.SourceCode = url + '/src' + app.IssueTracker = url + '/issues' + # Figure out the repo type and adddress... + app.RepoType, app.Repo = getrepofrompage(url) + elif parsed.netloc == 'codeberg.org': + app.RepoType = 'git' + app.SourceCode = url + app.IssueTracker = url + '/issues' + elif url.startswith('https://') and url.endswith('.git'): + app.RepoType = 'git' + + if not parsed.netloc and parsed.scheme in ('git', 'http', 'https', 'ssh'): + invalid_url = True + + if invalid_url: + raise ValueError(_('"{url}" is not a valid URL!'.format(url=url))) + + if not app.RepoType: + raise FDroidException("Unable to determine vcs type. " + app.Repo) + + return app + + def check_for_kivy_buildozer(tmp_importer_dir, app, build): versionCode = None buildozer_spec = tmp_importer_dir / 'buildozer.spec' @@ -148,7 +275,7 @@ def main(): break write_local_file = True elif options.url: - app = common.get_app_from_url(options.url) + app = get_app_from_url(options.url) tmp_importer_dir = clone_to_tmp_dir(app) # TODO: Python3.6: Should accept path-like git_repo = git.Repo(str(tmp_importer_dir)) diff --git a/tests/common.TestCase b/tests/common.TestCase index e80ecd35..549167fc 100755 --- a/tests/common.TestCase +++ b/tests/common.TestCase @@ -1419,19 +1419,6 @@ class CommonTest(unittest.TestCase): with self.assertRaises(MetaDataException): self.assertEqual(fdroidserver.common.parse_srclib_spec('@multi@at-signs@')) - def test_bad_urls(self): - for url in ( - 'asdf', - 'file://thing.git', - 'https:///github.com/my/project', - 'git:///so/many/slashes', - 'ssh:/notabug.org/missing/a/slash', - 'git:notabug.org/missing/some/slashes', - 'https//github.com/bar/baz', - ): - with self.assertRaises(ValueError): - fdroidserver.common.get_app_from_url(url) - def test_remove_signing_keys(self): testdir = tempfile.mkdtemp( prefix=inspect.currentframe().f_code.co_name, dir=self.tmpdir diff --git a/tests/import_subcommand.TestCase b/tests/import_subcommand.TestCase index f4400e40..8c2d7792 100755 --- a/tests/import_subcommand.TestCase +++ b/tests/import_subcommand.TestCase @@ -51,7 +51,7 @@ class ImportTest(unittest.TestCase): print('Skipping ImportTest!') return - app = fdroidserver.common.get_app_from_url(url) + app = fdroidserver.import_subcommand.get_app_from_url(url) fdroidserver.import_subcommand.clone_to_tmp_dir(app) self.assertEqual(app.RepoType, 'git') self.assertEqual(app.Repo, 'https://gitlab.com/fdroid/ci-test-app.git') @@ -88,13 +88,13 @@ class ImportTest(unittest.TestCase): # TODO: Python3.6: Accepts a path-like object. shutil.rmtree( str(tmp_importer), - onerror=fdroidserver.common.handle_retree_error_on_windows, + onerror=fdroidserver.import_subcommand.handle_retree_error_on_windows, ) shutil.copytree( str(self.basedir / 'source-files' / appid), str(tmp_importer) ) - app = fdroidserver.common.get_app_from_url(url) + app = fdroidserver.import_subcommand.get_app_from_url(url) with mock.patch( 'fdroidserver.common.getvcs', lambda a, b, c: fdroidserver.common.vcs(url, testdir), @@ -119,6 +119,19 @@ class ImportTest(unittest.TestCase): self.assertEqual(vc, versionCode) self.assertEqual(appid, package) + def test_bad_urls(self): + for url in ( + 'asdf', + 'file://thing.git', + 'https:///github.com/my/project', + 'git:///so/many/slashes', + 'ssh:/notabug.org/missing/a/slash', + 'git:notabug.org/missing/some/slashes', + 'https//github.com/bar/baz', + ): + with self.assertRaises(ValueError): + fdroidserver.import_subcommand.get_app_from_url(url) + if __name__ == "__main__": parser = optparse.OptionParser()