1
0
mirror of https://gitlab.com/fdroid/fdroidserver.git synced 2024-11-14 02:50:12 +01:00

import: overhaul URL validation to use urllib.parse

Python provides us a lovely URL parser with some level of validation built
in.  The parsed URL is then much easier to validate.
This commit is contained in:
Hans-Christoph Steiner 2020-02-03 12:42:12 +01:00
parent e9a6c84efd
commit 1153ac24fd
2 changed files with 49 additions and 55 deletions

View File

@ -22,6 +22,7 @@ import glob
import os import os
import re import re
import shutil import shutil
import urllib.parse
import urllib.request import urllib.request
from argparse import ArgumentParser from argparse import ArgumentParser
import logging import logging
@ -90,81 +91,64 @@ options = None
def get_app_from_url(url): def get_app_from_url(url):
"""Guess basic app metadata from the URL.
The URL must include a network hostname, unless it is an lp:,
file:, or git/ssh URL. This throws ValueError on bad URLs to
match urlparse().
"""
parsed = urllib.parse.urlparse(url)
invalid_url = False
if not parsed.scheme or not parsed.path:
invalid_url = True
app = metadata.App() app = metadata.App()
app.Repo = url
# Figure out what kind of project it is... if url.startswith('git://') or url.startswith('git@'):
projecttype = None app.RepoType = 'git'
app.WebSite = url # by default, we might override it elif parsed.netloc == 'github.com':
if url.startswith('git://'): app.RepoType = 'git'
projecttype = 'git'
repo = url
repotype = 'git'
app.SourceCode = ""
app.WebSite = ""
elif url.startswith('https://github.com'):
projecttype = 'github'
repo = url
repotype = 'git'
app.SourceCode = url app.SourceCode = url
app.IssueTracker = url + '/issues' app.IssueTracker = url + '/issues'
app.WebSite = "" elif parsed.netloc == 'gitlab.com':
elif url.startswith('https://gitlab.com/'):
projecttype = 'gitlab'
# git can be fussy with gitlab URLs unless they end in .git # git can be fussy with gitlab URLs unless they end in .git
if url.endswith('.git'): if url.endswith('.git'):
url = url[:-4] url = url[:-4]
repo = url + '.git' app.Repo = url + '.git'
repotype = 'git' app.RepoType = 'git'
app.WebSite = url
app.SourceCode = url + '/tree/HEAD'
app.IssueTracker = url + '/issues'
elif url.startswith('https://notabug.org/'):
projecttype = 'notabug'
if url.endswith('.git'):
url = url[:-4]
repo = url + '.git'
repotype = 'git'
app.SourceCode = url app.SourceCode = url
app.IssueTracker = url + '/issues' app.IssueTracker = url + '/issues'
app.WebSite = "" elif parsed.netloc == 'notabug.org':
elif url.startswith('https://bitbucket.org/'): if url.endswith('.git'):
url = url[:-4]
app.Repo = url + '.git'
app.RepoType = 'git'
app.SourceCode = url
app.IssueTracker = url + '/issues'
elif parsed.netloc == 'bitbucket.org':
if url.endswith('/'): if url.endswith('/'):
url = url[:-1] url = url[:-1]
projecttype = 'bitbucket'
app.SourceCode = url + '/src' app.SourceCode = url + '/src'
app.IssueTracker = url + '/issues' app.IssueTracker = url + '/issues'
# Figure out the repo type and adddress... # Figure out the repo type and adddress...
repotype, repo = getrepofrompage(url) app.RepoType, app.Repo = getrepofrompage(url)
if not repotype:
raise FDroidException("Unable to determine vcs type. " + repo)
elif url.startswith('https://') and url.endswith('.git'): elif url.startswith('https://') and url.endswith('.git'):
projecttype = 'git' app.RepoType = 'git'
repo = url
repotype = 'git'
app.SourceCode = ""
app.WebSite = ""
if not projecttype:
raise FDroidException("Unable to determine the project type. "
+ "The URL you supplied was not in one of the supported formats. "
+ "Please consult the manual for a list of supported formats, "
+ "and supply one of those.")
# Ensure we have a sensible-looking repo address at this point. If not, we if not parsed.netloc and parsed.scheme in ('git', 'http', 'https', 'ssh'):
# might have got a page format we weren't expecting. (Note that we invalid_url = True
# specifically don't want git@...)
if ((repotype != 'bzr' and (not repo.startswith('http://')
and not repo.startswith('https://')
and not repo.startswith('git://')))
or ' ' in repo):
raise FDroidException("Repo address '{0}' does not seem to be valid".format(repo))
if invalid_url:
raise ValueError(_('"{url}" is not a valid URL!'.format(url=url)))
app.RepoType = repotype if not app.RepoType:
app.Repo = repo raise FDroidException("Unable to determine vcs type. " + app.Repo)
return app return app
def clone_to_tmp_dir(app): def clone_to_tmp_dir(app):
tmp_dir = 'tmp' tmp_dir = 'tmp'
if not os.path.isdir(tmp_dir): if not os.path.isdir(tmp_dir):

View File

@ -52,7 +52,6 @@ class ImportTest(unittest.TestCase):
app = import_proxy.get_app_from_url(url) app = import_proxy.get_app_from_url(url)
import_proxy.clone_to_tmp_dir(app) import_proxy.clone_to_tmp_dir(app)
self.assertEqual(app.RepoType, 'git') self.assertEqual(app.RepoType, 'git')
self.assertEqual(app.WebSite, 'https://gitlab.com/fdroid/ci-test-app')
self.assertEqual(app.Repo, 'https://gitlab.com/fdroid/ci-test-app.git') self.assertEqual(app.Repo, 'https://gitlab.com/fdroid/ci-test-app.git')
def test_get_all_gradle_and_manifests(self): def test_get_all_gradle_and_manifests(self):
@ -86,6 +85,17 @@ class ImportTest(unittest.TestCase):
subdir = import_proxy.get_gradle_subdir(build_dir, paths) subdir = import_proxy.get_gradle_subdir(build_dir, paths)
self.assertEqual(subdirs[f], subdir) self.assertEqual(subdirs[f], subdir)
def test_bad_urls(self):
for url in ('asdf',
'file://thing.git',
'https:///github.com/my/project',
'git:///so/many/slashes',
'ssh:/notabug.org/missing/a/slash',
'git:notabug.org/missing/some/slashes',
'https//github.com/bar/baz'):
with self.assertRaises(ValueError):
import_proxy.get_app_from_url(url)
def test_get_app_from_url(self): def test_get_app_from_url(self):
testdir = tempfile.mkdtemp(prefix=inspect.currentframe().f_code.co_name, dir=self.tmpdir) testdir = tempfile.mkdtemp(prefix=inspect.currentframe().f_code.co_name, dir=self.tmpdir)
os.chdir(testdir) os.chdir(testdir)