diff options
author | Dan McGee <dan@archlinux.org> | 2010-09-21 17:39:46 -0500 |
---|---|---|
committer | Dan McGee <dan@archlinux.org> | 2010-09-21 17:39:46 -0500 |
commit | b3883820a249a0bb6ba6237f815b8cdffd630fcd (patch) | |
tree | 5c17812e1dde0c2885bf3520f435debc3e1122c9 /mirrors | |
parent | 4a99d313bfb2a226e6777a39a9a8588106f42685 (diff) | |
parent | 8ff8190c5ca29473cbcc398fb12b33b4430cc050 (diff) |
Merge branch 'mirror-check'
Diffstat (limited to 'mirrors')
-rw-r--r-- | mirrors/management/__init__.py | 0 | ||||
-rw-r--r-- | mirrors/management/commands/__init__.py | 0 | ||||
-rw-r--r-- | mirrors/management/commands/mirrorcheck.py | 153 | ||||
-rw-r--r-- | mirrors/migrations/0003_auto__add_mirrorlog.py | 72 | ||||
-rw-r--r-- | mirrors/models.py | 14 | ||||
-rw-r--r-- | mirrors/templatetags/__init__.py | 0 | ||||
-rw-r--r-- | mirrors/templatetags/mirror_status.py | 15 | ||||
-rw-r--r-- | mirrors/views.py | 57 |
8 files changed, 310 insertions, 1 deletions
diff --git a/mirrors/management/__init__.py b/mirrors/management/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/mirrors/management/__init__.py diff --git a/mirrors/management/commands/__init__.py b/mirrors/management/commands/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/mirrors/management/commands/__init__.py diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py new file mode 100644 index 00000000..1662b15c --- /dev/null +++ b/mirrors/management/commands/mirrorcheck.py @@ -0,0 +1,153 @@ +# -*- coding: utf-8 -*- +""" +mirrorcheck command + +Poll every active mirror URL we have in the database, grab the 'lastsync' file, +and record details about how long it took and how up to date the mirror is. If +we encounter errors, record those as well. + +Usage: ./manage.py mirrorcheck +""" + +from django.core.management.base import NoArgsCommand +from django.db.models import Q + +from datetime import datetime, timedelta +import logging +import re +import socket +import sys +import time +import thread +from threading import Thread +from Queue import Queue, Empty +import urllib2 + +from logging import ERROR, WARNING, INFO, DEBUG + +from mirrors.models import Mirror, MirrorUrl, MirrorLog + +logging.basicConfig( + level=WARNING, + format='%(asctime)s -> %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + stream=sys.stderr) +logger = logging.getLogger() + +class Command(NoArgsCommand): + help = "Runs a check on all known mirror URLs to determine their up-to-date status." + + def handle_noargs(self, **options): + v = int(options.get('verbosity', 0)) + if v == 0: + logger.level = ERROR + elif v == 1: + logger.level = WARNING + elif v == 2: + logger.level = DEBUG + + import signal, traceback + handler = lambda sig, stack: traceback.print_stack(stack) + signal.signal(signal.SIGQUIT, handler) + signal.signal(signal.SIGUSR1, handler) + + return check_current_mirrors() + +def parse_rfc3339_datetime(time): + # '2010-09-02 11:05:06+02:00' + m = re.match('^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})([-+])(\d{2}):(\d{2})', time) + if m: + vals = m.groups() + parsed = datetime(int(vals[0]), int(vals[1]), int(vals[2]), + int(vals[3]), int(vals[4]), int(vals[5])) + # now account for time zone offset + sign = vals[6] + offset = timedelta(hours=int(sign + vals[7]), + minutes=int(sign + vals[8])) + # subtract the offset, e.g. '-04:00' should be moved up 4 hours + return parsed - offset + return None + +def check_mirror_url(mirror_url): + url = mirror_url.url + 'lastsync' + logger.info("checking URL %s" % url) + log = MirrorLog(url=mirror_url, check_time=datetime.utcnow()) + try: + start = time.time() + result = urllib2.urlopen(url, timeout=10) + data = result.read() + result.close() + end = time.time() + # lastsync should be an epoch value, but some mirrors + # are creating their own in RFC-3339 format: + # '2010-09-02 11:05:06+02:00' + try: + parsed_time = datetime.utcfromtimestamp(int(data)) + except ValueError: + # it is bad news to try logging the lastsync value; + # sometimes we get a crazy-encoded web page. + logger.info("attempting to parse generated lastsync file" + " from mirror %s" % url) + parsed_time = parse_rfc3339_datetime(data) + + log.last_sync = parsed_time + log.duration = end - start + logger.debug("success: %s, %.2f" % (url, log.duration)) + except urllib2.HTTPError, e: + log.is_success = False + log.error =str(e) + logger.debug("failed: %s, %s" % (url, log.error)) + except urllib2.URLError, e: + log.is_success=False + log.error = e.reason + if isinstance(e.reason, socket.timeout): + log.error = "Connection timed out." + elif isinstance(e.reason, socket.error): + log.error = e.reason.args[1] + logger.debug("failed: %s, %s" % (url, log.error)) + + log.save() + return log + +def mirror_url_worker(queue): + while True: + try: + item = queue.get(block=False) + check_mirror_url(item) + queue.task_done() + except Empty: + return 0 + +class MirrorCheckPool(object): + def __init__(self, work, num_threads=10): + self.tasks = Queue() + for i in work: + self.tasks.put(i) + self.threads = [] + for i in range(num_threads): + thread = Thread(target=mirror_url_worker, args=(self.tasks,)) + thread.daemon = True + self.threads.append(thread) + + def run_and_join(self): + logger.debug("starting threads") + for t in self.threads: + t.start() + logger.debug("joining on all threads") + self.tasks.join() + +def check_current_mirrors(): + urls = MirrorUrl.objects.filter( + Q(protocol__protocol__iexact='HTTP') | + Q(protocol__protocol__iexact='FTP'), + mirror__active=True, mirror__public=True) + + pool = MirrorCheckPool(urls) + pool.run_and_join() + return 0 + +# For lack of a better place to put it, here is a query to get latest check +# result joined with mirror details: +# SELECT mu.*, m.*, ml.* FROM mirrors_mirrorurl mu JOIN mirrors_mirror m ON mu.mirror_id = m.id JOIN mirrors_mirrorlog ml ON mu.id = ml.url_id LEFT JOIN mirrors_mirrorlog ml2 ON ml.url_id = ml2.url_id AND ml.id < ml2.id WHERE ml2.id IS NULL AND m.active = 1 AND m.public = 1; + +# vim: set ts=4 sw=4 et: diff --git a/mirrors/migrations/0003_auto__add_mirrorlog.py b/mirrors/migrations/0003_auto__add_mirrorlog.py new file mode 100644 index 00000000..5b4c225b --- /dev/null +++ b/mirrors/migrations/0003_auto__add_mirrorlog.py @@ -0,0 +1,72 @@ +# encoding: utf-8 +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Adding model 'MirrorLog' + db.create_table('mirrors_mirrorlog', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('url', self.gf('django.db.models.fields.related.ForeignKey')(related_name='logs', to=orm['mirrors.MirrorUrl'])), + ('check_time', self.gf('django.db.models.fields.DateTimeField')(db_index=True)), + ('last_sync', self.gf('django.db.models.fields.DateTimeField')(null=True)), + ('duration', self.gf('django.db.models.fields.FloatField')(null=True)), + ('is_success', self.gf('django.db.models.fields.BooleanField')(default=True)), + ('error', self.gf('django.db.models.fields.CharField')(default='', max_length=255, blank=True)), + )) + db.send_create_signal('mirrors', ['MirrorLog']) + + def backwards(self, orm): + # Deleting model 'MirrorLog' + db.delete_table('mirrors_mirrorlog') + + models = { + 'mirrors.mirror': { + 'Meta': {'ordering': "('country', 'name')", 'object_name': 'Mirror'}, + 'active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'admin_email': ('django.db.models.fields.EmailField', [], {'max_length': '255', 'blank': 'True'}), + 'country': ('django.db.models.fields.CharField', [], {'max_length': '255', 'db_index': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'isos': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'notes': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'public': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'rsync_password': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '50', 'blank': 'True'}), + 'rsync_user': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '50', 'blank': 'True'}), + 'tier': ('django.db.models.fields.SmallIntegerField', [], {'default': '2'}), + 'upstream': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['mirrors.Mirror']", 'null': 'True'}) + }, + 'mirrors.mirrorlog': { + 'Meta': {'object_name': 'MirrorLog'}, + 'check_time': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}), + 'duration': ('django.db.models.fields.FloatField', [], {'null': 'True'}), + 'error': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '255', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_success': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'last_sync': ('django.db.models.fields.DateTimeField', [], {'null': 'True'}), + 'url': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'logs'", 'to': "orm['mirrors.MirrorUrl']"}) + }, + 'mirrors.mirrorprotocol': { + 'Meta': {'object_name': 'MirrorProtocol'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'protocol': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '10'}) + }, + 'mirrors.mirrorrsync': { + 'Meta': {'object_name': 'MirrorRsync'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'ip': ('django.db.models.fields.CharField', [], {'max_length': '24'}), + 'mirror': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'rsync_ips'", 'to': "orm['mirrors.Mirror']"}) + }, + 'mirrors.mirrorurl': { + 'Meta': {'object_name': 'MirrorUrl'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'mirror': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'urls'", 'to': "orm['mirrors.Mirror']"}), + 'protocol': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'urls'", 'to': "orm['mirrors.MirrorProtocol']"}), + 'url': ('django.db.models.fields.CharField', [], {'max_length': '255'}) + } + } + + complete_apps = ['mirrors'] diff --git a/mirrors/models.py b/mirrors/models.py index 94256a9c..5cab9db6 100644 --- a/mirrors/models.py +++ b/mirrors/models.py @@ -54,4 +54,18 @@ class MirrorRsync(models.Model): class Meta: verbose_name = 'Mirror Rsync IP' +class MirrorLog(models.Model): + url = models.ForeignKey(MirrorUrl, related_name="logs") + check_time = models.DateTimeField(db_index=True) + last_sync = models.DateTimeField(null=True) + duration = models.FloatField(null=True) + is_success = models.BooleanField(default=True) + error = models.CharField(max_length=255, blank=True, default='') + + def __unicode__(self): + return "Check of %s at %s" % (url.url, check_time) + + class Meta: + verbose_name = 'Mirror Check Log' + # vim: set ts=4 sw=4 et: diff --git a/mirrors/templatetags/__init__.py b/mirrors/templatetags/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/mirrors/templatetags/__init__.py diff --git a/mirrors/templatetags/mirror_status.py b/mirrors/templatetags/mirror_status.py new file mode 100644 index 00000000..09c5b331 --- /dev/null +++ b/mirrors/templatetags/mirror_status.py @@ -0,0 +1,15 @@ +from django import template + +register = template.Library() + +@register.filter +def duration(value): + if not value: + return u'\u221e' + # does not take microseconds into account + total_secs = value.seconds + value.days * 24 * 3600 + mins, secs = divmod(total_secs, 60) + hrs, mins = divmod(mins, 60) + return '%d:%02d' % (hrs, mins) + +# vim: set ts=4 sw=4 et: diff --git a/mirrors/views.py b/mirrors/views.py index ddc42cbb..59d6337b 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -1,9 +1,14 @@ from django import forms +from django.db.models import Avg, Count, Max, Min, StdDev +from django.db.models import Q from django.views.decorators.csrf import csrf_exempt from django.views.generic.simple import direct_to_template from main.utils import make_choice from .models import Mirror, MirrorUrl, MirrorProtocol +from .models import MirrorLog + +import datetime class MirrorlistForm(forms.Form): country = forms.MultipleChoiceField(required=False) @@ -21,7 +26,7 @@ class MirrorlistForm(forms.Form): self.fields['protocol'].initial = [t[0] for t in protos] @csrf_exempt -def generate(request): +def generate_mirrorlist(request): if request.REQUEST.get('country', ''): form = MirrorlistForm(data=request.REQUEST) if form.is_valid(): @@ -49,4 +54,54 @@ def find_mirrors(request, countries=None, protocols=None): }, mimetype='text/plain') +def status(request): + cutoff_time = datetime.datetime.utcnow() - datetime.timedelta(hours=24) + bad_timedelta = datetime.timedelta(days=3) + + protocols = MirrorProtocol.objects.exclude(protocol__iexact='rsync') + # I swear, this actually has decent performance... + urls = MirrorUrl.objects.select_related( + 'mirror', 'protocol').filter( + mirror__active=True, mirror__public=True, + protocol__in=protocols).filter( + logs__check_time__gte=cutoff_time).annotate( + check_count=Count('logs'), last_sync=Max('logs__last_sync'), + last_check=Max('logs__check_time'), + duration_avg=Avg('logs__duration'), duration_min=Min('logs__duration'), + duration_max=Max('logs__duration'), duration_stddev=StdDev('logs__duration') + ).order_by('-last_sync', '-duration_avg') + # errors during check process go in another table + error_logs = MirrorLog.objects.filter( + is_success=False, check_time__gte=cutoff_time).values( + 'url__url', 'url__protocol__protocol', 'url__mirror__country', + 'error').annotate( + error_count=Count('error'), last_occurred=Max('check_time') + ).order_by('-last_occurred', '-error_count') + + last_check = max([u.last_check for u in urls]) + + good_urls = [] + bad_urls = [] + for url in urls: + if url.last_check and url.last_sync: + d = url.last_check - url.last_sync + url.delay = d + url.score = d.days * 24 + d.seconds / 3600 + url.duration_avg + url.duration_stddev + else: + url.delay = None + url.score = None + # split them into good and bad lists based on delay + if not url.delay or url.delay > bad_timedelta: + bad_urls.append(url) + else: + good_urls.append(url) + + context = { + 'last_check': last_check, + 'good_urls': good_urls, + 'bad_urls': bad_urls, + 'error_logs': error_logs, + } + return direct_to_template(request, 'mirrors/status.html', context) + # vim: set ts=4 sw=4 et: |