2013-10-31 13:25:39 +01:00
|
|
|
#!/usr/bin/env python2
|
2012-01-17 18:25:28 +01:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
#
|
2012-02-26 18:14:15 +01:00
|
|
|
# stats.py - part of the FDroid server tools
|
2013-10-31 16:37:39 +01:00
|
|
|
# Copyright (C) 2010-13, Ciaran Gultnieks, ciaran@ciarang.com
|
2012-01-17 18:25:28 +01:00
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU Affero General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
import sys
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import time
|
|
|
|
import traceback
|
|
|
|
import glob
|
|
|
|
from optparse import OptionParser
|
|
|
|
import paramiko
|
2013-05-23 12:04:57 +02:00
|
|
|
import socket
|
2014-01-27 17:04:22 +01:00
|
|
|
import logging
|
2014-01-27 16:31:11 +01:00
|
|
|
import common, metadata
|
2014-02-04 08:34:40 +01:00
|
|
|
import subprocess
|
2014-02-19 09:54:24 +01:00
|
|
|
from collections import Counter
|
2013-05-23 12:04:57 +02:00
|
|
|
|
|
|
|
def carbon_send(key, value):
|
2013-05-25 14:51:52 +02:00
|
|
|
s = socket.socket()
|
2013-10-31 16:37:39 +01:00
|
|
|
s.connect((config['carbon_host'], config['carbon_port']))
|
2013-05-25 14:51:52 +02:00
|
|
|
msg = '%s %d %d\n' % (key, value, int(time.time()))
|
|
|
|
s.sendall(msg)
|
|
|
|
s.close()
|
2012-01-17 18:25:28 +01:00
|
|
|
|
2013-11-01 12:10:57 +01:00
|
|
|
options = None
|
|
|
|
config = None
|
2013-10-31 16:37:39 +01:00
|
|
|
|
2012-02-26 15:18:58 +01:00
|
|
|
def main():
|
|
|
|
|
2013-11-01 12:10:57 +01:00
|
|
|
global options, config
|
2012-02-26 15:18:58 +01:00
|
|
|
|
|
|
|
# Parse command line...
|
|
|
|
parser = OptionParser()
|
|
|
|
parser.add_option("-v", "--verbose", action="store_true", default=False,
|
|
|
|
help="Spew out even more information than normal")
|
|
|
|
parser.add_option("-d", "--download", action="store_true", default=False,
|
|
|
|
help="Download logs we don't have")
|
2013-12-28 11:35:50 +01:00
|
|
|
parser.add_option("--nologs", action="store_true", default=False,
|
|
|
|
help="Don't do anything logs-related")
|
2012-02-26 15:18:58 +01:00
|
|
|
(options, args) = parser.parse_args()
|
|
|
|
|
2013-11-01 12:10:57 +01:00
|
|
|
config = common.read_config(options)
|
|
|
|
|
2013-11-05 08:58:56 +01:00
|
|
|
if not config['update_stats']:
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info("Stats are disabled - check your configuration")
|
2013-11-05 08:58:56 +01:00
|
|
|
sys.exit(1)
|
|
|
|
|
2012-04-08 15:07:01 +02:00
|
|
|
# Get all metadata-defined apps...
|
2013-11-19 15:35:16 +01:00
|
|
|
metaapps = metadata.read_metadata(options.verbose)
|
2012-02-26 15:18:58 +01:00
|
|
|
|
|
|
|
statsdir = 'stats'
|
|
|
|
logsdir = os.path.join(statsdir, 'logs')
|
|
|
|
datadir = os.path.join(statsdir, 'data')
|
|
|
|
if not os.path.exists(statsdir):
|
|
|
|
os.mkdir(statsdir)
|
|
|
|
if not os.path.exists(logsdir):
|
|
|
|
os.mkdir(logsdir)
|
|
|
|
if not os.path.exists(datadir):
|
|
|
|
os.mkdir(datadir)
|
|
|
|
|
|
|
|
if options.download:
|
|
|
|
# Get any access logs we don't have...
|
|
|
|
ssh = None
|
|
|
|
ftp = None
|
|
|
|
try:
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info('Retrieving logs')
|
2012-02-26 15:18:58 +01:00
|
|
|
ssh = paramiko.SSHClient()
|
|
|
|
ssh.load_system_host_keys()
|
|
|
|
ssh.connect('f-droid.org', username='fdroid', timeout=10,
|
2013-10-31 16:37:39 +01:00
|
|
|
key_filename=config['webserver_keyfile'])
|
2012-02-26 15:18:58 +01:00
|
|
|
ftp = ssh.open_sftp()
|
2013-05-23 12:04:57 +02:00
|
|
|
ftp.get_channel().settimeout(60)
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info("...connected")
|
2012-02-26 15:18:58 +01:00
|
|
|
|
|
|
|
ftp.chdir('logs')
|
|
|
|
files = ftp.listdir()
|
|
|
|
for f in files:
|
2013-05-31 08:51:54 +02:00
|
|
|
if f.startswith('access-') and f.endswith('.log.gz'):
|
2012-02-26 15:18:58 +01:00
|
|
|
|
|
|
|
destpath = os.path.join(logsdir, f)
|
2013-05-31 08:51:54 +02:00
|
|
|
destsize = ftp.stat(f).st_size
|
|
|
|
if (not os.path.exists(destpath) or
|
|
|
|
os.path.getsize(destpath) != destsize):
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info("...retrieving " + f)
|
2013-05-31 08:51:54 +02:00
|
|
|
ftp.get(f, destpath)
|
2013-10-31 16:46:32 +01:00
|
|
|
except Exception:
|
2012-02-26 15:18:58 +01:00
|
|
|
traceback.print_exc()
|
|
|
|
sys.exit(1)
|
|
|
|
finally:
|
|
|
|
#Disconnect
|
2013-10-13 00:03:57 +02:00
|
|
|
if ftp is not None:
|
2012-02-26 15:18:58 +01:00
|
|
|
ftp.close()
|
2013-10-13 00:03:57 +02:00
|
|
|
if ssh is not None:
|
2012-02-26 15:18:58 +01:00
|
|
|
ssh.close()
|
|
|
|
|
|
|
|
knownapks = common.KnownApks()
|
2013-12-28 11:35:50 +01:00
|
|
|
unknownapks = []
|
|
|
|
|
|
|
|
if not options.nologs:
|
|
|
|
# Process logs
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info('Processing logs...')
|
2014-02-19 09:54:24 +01:00
|
|
|
appscount = Counter()
|
|
|
|
appsvercount = Counter()
|
2013-12-28 11:35:50 +01:00
|
|
|
logexpr = '(?P<ip>[.:0-9a-fA-F]+) - - \[(?P<time>.*?)\] "GET (?P<uri>.*?) HTTP/1.\d" (?P<statuscode>\d+) \d+ "(?P<referral>.*?)" "(?P<useragent>.*?)"'
|
|
|
|
logsearch = re.compile(logexpr).search
|
|
|
|
for logfile in glob.glob(os.path.join(logsdir,'access-*.log.gz')):
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info('...' + logfile)
|
2014-02-04 08:34:40 +01:00
|
|
|
if options.verbose:
|
|
|
|
print '...' + logfile
|
|
|
|
p = subprocess.Popen(["zcat", logfile], stdout = subprocess.PIPE)
|
2013-12-28 11:35:50 +01:00
|
|
|
matches = (logsearch(line) for line in p.stdout)
|
|
|
|
for match in matches:
|
|
|
|
if match and match.group('statuscode') == '200':
|
|
|
|
uri = match.group('uri')
|
2014-02-04 08:34:40 +01:00
|
|
|
if uri.endswith('.apk'):
|
|
|
|
_, apkname = os.path.split(uri)
|
|
|
|
app = knownapks.getapp(apkname)
|
|
|
|
if app:
|
|
|
|
appid, _ = app
|
2014-02-19 09:54:24 +01:00
|
|
|
appscount[appid] += 1
|
2014-02-04 08:34:40 +01:00
|
|
|
# Strip the '.apk' from apkname
|
2014-02-19 09:54:24 +01:00
|
|
|
appver = apkname[:-4]
|
|
|
|
appsvercount[appver] += 1
|
2012-02-26 15:18:58 +01:00
|
|
|
else:
|
2014-02-04 08:34:40 +01:00
|
|
|
if not apkname in unknownapks:
|
|
|
|
unknownapks.append(apkname)
|
2013-12-28 11:35:50 +01:00
|
|
|
|
|
|
|
# Calculate and write stats for total downloads...
|
|
|
|
lst = []
|
|
|
|
alldownloads = 0
|
2014-02-19 09:54:24 +01:00
|
|
|
for appid in appscount:
|
|
|
|
count = appscount[appid]
|
2013-12-28 11:35:50 +01:00
|
|
|
lst.append(app + " " + str(count))
|
|
|
|
if config['stats_to_carbon']:
|
|
|
|
carbon_send('fdroid.download.' + app.replace('.', '_'), count)
|
|
|
|
alldownloads += count
|
|
|
|
lst.append("ALL " + str(alldownloads))
|
|
|
|
f = open('stats/total_downloads_app.txt', 'w')
|
|
|
|
f.write('# Total downloads by application, since October 2011\n')
|
|
|
|
for line in sorted(lst):
|
|
|
|
f.write(line + '\n')
|
|
|
|
f.close()
|
2012-02-26 15:18:58 +01:00
|
|
|
|
2014-01-08 11:21:33 +01:00
|
|
|
f = open('stats/total_downloads_app_version.txt', 'w')
|
|
|
|
f.write('# Total downloads by application and version, since October 2011\n')
|
2014-01-24 00:31:03 +01:00
|
|
|
lst = []
|
2014-02-19 09:54:24 +01:00
|
|
|
for appver in appsvercount:
|
|
|
|
count = appsvercount[appver]
|
2014-01-24 00:31:03 +01:00
|
|
|
lst.append(appver + " " + str(count))
|
|
|
|
for line in sorted(lst):
|
|
|
|
f.write(line + "\n")
|
2014-01-08 11:21:33 +01:00
|
|
|
f.close()
|
|
|
|
|
2012-04-08 15:07:01 +02:00
|
|
|
# Calculate and write stats for repo types...
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info("Processing repo types...")
|
2014-02-19 10:03:42 +01:00
|
|
|
repotypes = Counter()
|
2012-04-08 15:07:01 +02:00
|
|
|
for app in metaapps:
|
|
|
|
if len(app['Repo Type']) == 0:
|
|
|
|
rtype = 'none'
|
|
|
|
else:
|
2013-05-24 23:52:14 +02:00
|
|
|
if app['Repo Type'] == 'srclib':
|
|
|
|
rtype = common.getsrclibvcs(app['Repo'])
|
|
|
|
else:
|
|
|
|
rtype = app['Repo Type']
|
2014-02-19 10:03:42 +01:00
|
|
|
repotypes[rtype] += 1
|
2012-04-08 15:07:01 +02:00
|
|
|
f = open('stats/repotypes.txt', 'w')
|
2014-02-19 10:03:42 +01:00
|
|
|
for rtype in repotypes:
|
|
|
|
count = repotypes[rtype]
|
2012-04-08 15:07:01 +02:00
|
|
|
f.write(rtype + ' ' + str(count) + '\n')
|
|
|
|
f.close()
|
|
|
|
|
2013-05-17 22:39:35 +02:00
|
|
|
# Calculate and write stats for update check modes...
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info("Processing update check modes...")
|
2014-02-19 10:03:42 +01:00
|
|
|
ucms = Counter()
|
2013-05-17 22:39:35 +02:00
|
|
|
for app in metaapps:
|
2014-02-11 08:47:47 +01:00
|
|
|
checkmode = app['Update Check Mode']
|
|
|
|
if checkmode.startswith('RepoManifest/'):
|
|
|
|
checkmode = checkmode[:12]
|
|
|
|
if checkmode.startswith('Tags '):
|
|
|
|
checkmode = checkmode[:4]
|
2014-02-19 10:03:42 +01:00
|
|
|
ucms[checkmode] += 1;
|
2013-05-17 22:39:35 +02:00
|
|
|
f = open('stats/update_check_modes.txt', 'w')
|
2014-02-19 10:03:42 +01:00
|
|
|
for checkmode in ucms:
|
|
|
|
count = ucms[checkmode]
|
2013-05-17 22:39:35 +02:00
|
|
|
f.write(checkmode + ' ' + str(count) + '\n')
|
|
|
|
f.close()
|
|
|
|
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info("Processing categories...")
|
2014-02-19 10:03:42 +01:00
|
|
|
ctgs = Counter()
|
2013-10-31 13:08:15 +01:00
|
|
|
for app in metaapps:
|
2014-02-19 10:21:13 +01:00
|
|
|
for category in app['Categories']:
|
2014-02-19 10:03:42 +01:00
|
|
|
ctgs[category] += 1;
|
2013-10-31 13:08:15 +01:00
|
|
|
f = open('stats/categories.txt', 'w')
|
2014-02-19 10:03:42 +01:00
|
|
|
for category in ctgs:
|
|
|
|
count = ctgs[category]
|
2013-10-31 13:08:15 +01:00
|
|
|
f.write(category + ' ' + str(count) + '\n')
|
|
|
|
f.close()
|
|
|
|
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info("Processing antifeatures...")
|
2014-02-19 10:03:42 +01:00
|
|
|
afs = Counter()
|
2013-10-31 13:08:15 +01:00
|
|
|
for app in metaapps:
|
|
|
|
if app['AntiFeatures'] is None:
|
|
|
|
continue
|
|
|
|
antifeatures = [a.strip() for a in app['AntiFeatures'].split(',')]
|
|
|
|
for antifeature in antifeatures:
|
2014-02-19 10:03:42 +01:00
|
|
|
afs[antifeature] += 1;
|
2013-10-31 13:08:15 +01:00
|
|
|
f = open('stats/antifeatures.txt', 'w')
|
2014-02-19 10:03:42 +01:00
|
|
|
for antifeature in afs:
|
|
|
|
count = afs[antifeature]
|
2013-10-31 13:08:15 +01:00
|
|
|
f.write(antifeature + ' ' + str(count) + '\n')
|
|
|
|
f.close()
|
|
|
|
|
2012-04-08 15:07:01 +02:00
|
|
|
# Calculate and write stats for licenses...
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info("Processing licenses...")
|
2014-02-19 10:03:42 +01:00
|
|
|
licenses = Counter()
|
2012-04-08 15:07:01 +02:00
|
|
|
for app in metaapps:
|
|
|
|
license = app['License']
|
2014-02-19 10:03:42 +01:00
|
|
|
licenses[license] += 1;
|
2012-04-08 15:07:01 +02:00
|
|
|
f = open('stats/licenses.txt', 'w')
|
2014-02-19 10:03:42 +01:00
|
|
|
for license in licenses:
|
|
|
|
count = licenses[license]
|
2012-04-08 15:07:01 +02:00
|
|
|
f.write(license + ' ' + str(count) + '\n')
|
|
|
|
f.close()
|
|
|
|
|
2012-02-26 15:18:58 +01:00
|
|
|
# Write list of latest apps added to the repo...
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info("Processing latest apps...")
|
2012-02-26 15:18:58 +01:00
|
|
|
latest = knownapks.getlatest(10)
|
|
|
|
f = open('stats/latestapps.txt', 'w')
|
|
|
|
for app in latest:
|
|
|
|
f.write(app + '\n')
|
|
|
|
f.close()
|
|
|
|
|
2013-12-28 11:35:50 +01:00
|
|
|
if unknownapks:
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info('\nUnknown apks:')
|
2012-02-26 15:18:58 +01:00
|
|
|
for apk in unknownapks:
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info(apk)
|
2012-02-26 15:18:58 +01:00
|
|
|
|
2014-01-27 17:04:22 +01:00
|
|
|
logging.info("Finished.")
|
2012-02-26 15:18:58 +01:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|
2012-01-17 18:25:28 +01:00
|
|
|
|