1
0
mirror of https://gitlab.com/fdroid/fdroidserver.git synced 2024-11-04 22:40:12 +01:00
fdroidserver/fdroidserver/stats.py

295 lines
10 KiB
Python
Raw Normal View History

2013-10-31 13:25:39 +01:00
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# stats.py - part of the FDroid server tools
# Copyright (C) 2010-13, Ciaran Gultnieks, ciaran@ciarang.com
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import sys
import os
import re
import time
import traceback
import glob
2014-04-29 16:33:20 +02:00
import json
from argparse import ArgumentParser
import paramiko
2013-05-23 12:04:57 +02:00
import socket
2014-01-27 17:04:22 +01:00
import logging
import common
import metadata
import subprocess
from collections import Counter
2013-05-23 12:04:57 +02:00
2013-05-23 12:04:57 +02:00
def carbon_send(key, value):
2013-05-25 14:51:52 +02:00
s = socket.socket()
s.connect((config['carbon_host'], config['carbon_port']))
2013-05-25 14:51:52 +02:00
msg = '%s %d %d\n' % (key, value, int(time.time()))
s.sendall(msg)
s.close()
options = None
config = None
def main():
global options, config
# Parse command line...
parser = ArgumentParser()
2015-09-12 08:42:50 +02:00
common.setup_global_opts(parser)
parser.add_argument("-d", "--download", action="store_true", default=False,
help="Download logs we don't have")
parser.add_argument("--recalc", action="store_true", default=False,
help="Recalculate aggregate stats - use when changes "
"have been made that would invalidate old cached data.")
parser.add_argument("--nologs", action="store_true", default=False,
help="Don't do anything logs-related")
options = parser.parse_args()
config = common.read_config(options)
if not config['update_stats']:
2015-08-29 03:29:28 +02:00
logging.info("Stats are disabled - set \"update_stats = True\" in your config.py")
sys.exit(1)
# Get all metadata-defined apps...
allmetaapps = [app for app in metadata.read_metadata().itervalues()]
metaapps = [app for app in allmetaapps if not app.Disabled]
statsdir = 'stats'
logsdir = os.path.join(statsdir, 'logs')
datadir = os.path.join(statsdir, 'data')
if not os.path.exists(statsdir):
os.mkdir(statsdir)
if not os.path.exists(logsdir):
os.mkdir(logsdir)
if not os.path.exists(datadir):
os.mkdir(datadir)
if options.download:
# Get any access logs we don't have...
ssh = None
ftp = None
try:
2014-01-27 17:04:22 +01:00
logging.info('Retrieving logs')
ssh = paramiko.SSHClient()
ssh.load_system_host_keys()
2014-09-02 19:53:22 +02:00
ssh.connect(config['stats_server'], username=config['stats_user'],
timeout=10, key_filename=config['webserver_keyfile'])
ftp = ssh.open_sftp()
2013-05-23 12:04:57 +02:00
ftp.get_channel().settimeout(60)
2014-01-27 17:04:22 +01:00
logging.info("...connected")
ftp.chdir('logs')
files = ftp.listdir()
for f in files:
if f.startswith('access-') and f.endswith('.log.gz'):
destpath = os.path.join(logsdir, f)
destsize = ftp.stat(f).st_size
if (not os.path.exists(destpath) or
os.path.getsize(destpath) != destsize):
2014-02-22 11:05:07 +01:00
logging.debug("...retrieving " + f)
ftp.get(f, destpath)
2013-10-31 16:46:32 +01:00
except Exception:
traceback.print_exc()
sys.exit(1)
finally:
# Disconnect
2013-10-13 00:03:57 +02:00
if ftp is not None:
ftp.close()
2013-10-13 00:03:57 +02:00
if ssh is not None:
ssh.close()
knownapks = common.KnownApks()
2013-12-28 11:35:50 +01:00
unknownapks = []
if not options.nologs:
# Process logs
2014-01-27 17:04:22 +01:00
logging.info('Processing logs...')
appscount = Counter()
appsvercount = Counter()
logexpr = '(?P<ip>[.:0-9a-fA-F]+) - - \[(?P<time>.*?)\] ' + \
'"GET (?P<uri>.*?) HTTP/1.\d" (?P<statuscode>\d+) ' + \
'\d+ "(?P<referral>.*?)" "(?P<useragent>.*?)"'
2013-12-28 11:35:50 +01:00
logsearch = re.compile(logexpr).search
for logfile in glob.glob(os.path.join(logsdir, 'access-*.log.gz')):
2014-02-22 11:05:07 +01:00
logging.debug('...' + logfile)
2014-04-29 16:33:20 +02:00
# Get the date for this log - e.g. 2012-02-28
thisdate = os.path.basename(logfile)[7:-7]
agg_path = os.path.join(datadir, thisdate + '.json')
if not options.recalc and os.path.exists(agg_path):
# Use previously calculated aggregate data
with open(agg_path, 'r') as f:
today = json.load(f)
else:
# Calculate from logs...
today = {
'apps': Counter(),
'appsver': Counter(),
'unknown': []
}
2014-04-29 16:33:20 +02:00
p = subprocess.Popen(["zcat", logfile], stdout=subprocess.PIPE)
2014-04-29 16:33:20 +02:00
matches = (logsearch(line) for line in p.stdout)
for match in matches:
if not match:
continue
if match.group('statuscode') != '200':
continue
if match.group('ip') in config['stats_ignore']:
continue
uri = match.group('uri')
if not uri.endswith('.apk'):
continue
_, apkname = os.path.split(uri)
app = knownapks.getapp(apkname)
if app:
appid, _ = app
today['apps'][appid] += 1
# Strip the '.apk' from apkname
appver = apkname[:-4]
today['appsver'][appver] += 1
else:
if apkname not in today['unknown']:
today['unknown'].append(apkname)
2014-04-29 16:33:20 +02:00
# Save calculated aggregate data for today to cache
with open(agg_path, 'w') as f:
json.dump(today, f)
# Add today's stats (whether cached or recalculated) to the total
for appid in today['apps']:
appscount[appid] += today['apps'][appid]
for appid in today['appsver']:
appsvercount[appid] += today['appsver'][appid]
for uk in today['unknown']:
if uk not in unknownapks:
2014-04-29 16:33:20 +02:00
unknownapks.append(uk)
2013-12-28 11:35:50 +01:00
# Calculate and write stats for total downloads...
lst = []
alldownloads = 0
for appid in appscount:
count = appscount[appid]
2014-02-23 23:33:44 +01:00
lst.append(appid + " " + str(count))
2013-12-28 11:35:50 +01:00
if config['stats_to_carbon']:
carbon_send('fdroid.download.' + appid.replace('.', '_'),
count)
2013-12-28 11:35:50 +01:00
alldownloads += count
lst.append("ALL " + str(alldownloads))
2015-09-15 03:13:03 +02:00
with open(os.path.join(statsdir, 'total_downloads_app.txt'), 'w') as f:
2015-08-29 03:37:23 +02:00
f.write('# Total downloads by application, since October 2011\n')
for line in sorted(lst):
f.write(line + '\n')
2014-01-24 00:31:03 +01:00
lst = []
for appver in appsvercount:
count = appsvercount[appver]
2014-01-24 00:31:03 +01:00
lst.append(appver + " " + str(count))
2015-08-29 03:37:23 +02:00
2015-09-15 03:13:03 +02:00
with open(os.path.join(statsdir, 'total_downloads_app_version.txt'), 'w') as f:
2015-08-29 03:37:23 +02:00
f.write('# Total downloads by application and version, '
'since October 2011\n')
for line in sorted(lst):
f.write(line + "\n")
# Calculate and write stats for repo types...
2014-01-27 17:04:22 +01:00
logging.info("Processing repo types...")
repotypes = Counter()
for app in metaapps:
rtype = app.RepoType or 'none'
2014-08-16 11:42:51 +02:00
if rtype == 'srclib':
rtype = common.getsrclibvcs(app.Repo)
repotypes[rtype] += 1
2015-09-15 03:13:03 +02:00
with open(os.path.join(statsdir, 'repotypes.txt'), 'w') as f:
2015-08-29 03:37:23 +02:00
for rtype, count in repotypes.most_common():
f.write(rtype + ' ' + str(count) + '\n')
2013-05-17 22:39:35 +02:00
# Calculate and write stats for update check modes...
2014-01-27 17:04:22 +01:00
logging.info("Processing update check modes...")
ucms = Counter()
2013-05-17 22:39:35 +02:00
for app in metaapps:
checkmode = app.UpdateCheckMode
2014-02-11 08:47:47 +01:00
if checkmode.startswith('RepoManifest/'):
checkmode = checkmode[:12]
if checkmode.startswith('Tags '):
checkmode = checkmode[:4]
2014-04-30 12:46:28 +02:00
ucms[checkmode] += 1
2015-09-15 03:13:03 +02:00
with open(os.path.join(statsdir, 'update_check_modes.txt'), 'w') as f:
2015-08-29 03:37:23 +02:00
for checkmode, count in ucms.most_common():
f.write(checkmode + ' ' + str(count) + '\n')
2013-05-17 22:39:35 +02:00
2014-01-27 17:04:22 +01:00
logging.info("Processing categories...")
ctgs = Counter()
for app in metaapps:
for category in app.Categories:
2014-04-30 12:46:28 +02:00
ctgs[category] += 1
2015-09-15 03:13:03 +02:00
with open(os.path.join(statsdir, 'categories.txt'), 'w') as f:
2015-08-29 03:37:23 +02:00
for category, count in ctgs.most_common():
f.write(category + ' ' + str(count) + '\n')
2014-01-27 17:04:22 +01:00
logging.info("Processing antifeatures...")
afs = Counter()
for app in metaapps:
if app.AntiFeatures is None:
continue
for antifeature in app.AntiFeatures:
2014-04-30 12:46:28 +02:00
afs[antifeature] += 1
2015-09-15 03:13:03 +02:00
with open(os.path.join(statsdir, 'antifeatures.txt'), 'w') as f:
2015-08-29 03:37:23 +02:00
for antifeature, count in afs.most_common():
f.write(antifeature + ' ' + str(count) + '\n')
# Calculate and write stats for licenses...
2014-01-27 17:04:22 +01:00
logging.info("Processing licenses...")
licenses = Counter()
for app in metaapps:
license = app.License
2014-04-30 12:46:28 +02:00
licenses[license] += 1
2015-09-15 03:13:03 +02:00
with open(os.path.join(statsdir, 'licenses.txt'), 'w') as f:
2015-08-29 03:37:23 +02:00
for license, count in licenses.most_common():
f.write(license + ' ' + str(count) + '\n')
# Write list of disabled apps...
logging.info("Processing disabled apps...")
disabled = [app.id for app in allmetaapps if app.Disabled]
2015-09-15 03:13:03 +02:00
with open(os.path.join(statsdir, 'disabled_apps.txt'), 'w') as f:
2015-08-29 03:37:23 +02:00
for appid in sorted(disabled):
f.write(appid + '\n')
# Write list of latest apps added to the repo...
2014-01-27 17:04:22 +01:00
logging.info("Processing latest apps...")
latest = knownapks.getlatest(10)
2015-09-15 03:13:03 +02:00
with open(os.path.join(statsdir, 'latestapps.txt'), 'w') as f:
2015-08-29 03:37:23 +02:00
for appid in latest:
f.write(appid + '\n')
2013-12-28 11:35:50 +01:00
if unknownapks:
2014-01-27 17:04:22 +01:00
logging.info('\nUnknown apks:')
for apk in unknownapks:
2014-01-27 17:04:22 +01:00
logging.info(apk)
2014-01-27 17:04:22 +01:00
logging.info("Finished.")
if __name__ == "__main__":
main()