From 0776b440b522f2b01801c31e89b5cf4e8073f0f1 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Mon, 6 Sep 2010 12:37:52 -0500 Subject: Add new MirrorLog model and associated migration Signed-off-by: Dan McGee --- mirrors/migrations/0003_auto__add_mirrorlog.py | 72 ++++++++++++++++++++++++++ mirrors/models.py | 14 +++++ 2 files changed, 86 insertions(+) create mode 100644 mirrors/migrations/0003_auto__add_mirrorlog.py (limited to 'mirrors') diff --git a/mirrors/migrations/0003_auto__add_mirrorlog.py b/mirrors/migrations/0003_auto__add_mirrorlog.py new file mode 100644 index 00000000..5b4c225b --- /dev/null +++ b/mirrors/migrations/0003_auto__add_mirrorlog.py @@ -0,0 +1,72 @@ +# encoding: utf-8 +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Adding model 'MirrorLog' + db.create_table('mirrors_mirrorlog', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('url', self.gf('django.db.models.fields.related.ForeignKey')(related_name='logs', to=orm['mirrors.MirrorUrl'])), + ('check_time', self.gf('django.db.models.fields.DateTimeField')(db_index=True)), + ('last_sync', self.gf('django.db.models.fields.DateTimeField')(null=True)), + ('duration', self.gf('django.db.models.fields.FloatField')(null=True)), + ('is_success', self.gf('django.db.models.fields.BooleanField')(default=True)), + ('error', self.gf('django.db.models.fields.CharField')(default='', max_length=255, blank=True)), + )) + db.send_create_signal('mirrors', ['MirrorLog']) + + def backwards(self, orm): + # Deleting model 'MirrorLog' + db.delete_table('mirrors_mirrorlog') + + models = { + 'mirrors.mirror': { + 'Meta': {'ordering': "('country', 'name')", 'object_name': 'Mirror'}, + 'active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'admin_email': ('django.db.models.fields.EmailField', [], {'max_length': '255', 'blank': 'True'}), + 'country': ('django.db.models.fields.CharField', [], {'max_length': '255', 'db_index': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'isos': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'notes': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'public': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'rsync_password': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '50', 'blank': 'True'}), + 'rsync_user': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '50', 'blank': 'True'}), + 'tier': ('django.db.models.fields.SmallIntegerField', [], {'default': '2'}), + 'upstream': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['mirrors.Mirror']", 'null': 'True'}) + }, + 'mirrors.mirrorlog': { + 'Meta': {'object_name': 'MirrorLog'}, + 'check_time': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}), + 'duration': ('django.db.models.fields.FloatField', [], {'null': 'True'}), + 'error': ('django.db.models.fields.CharField', [], {'default': "''", 'max_length': '255', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_success': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'last_sync': ('django.db.models.fields.DateTimeField', [], {'null': 'True'}), + 'url': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'logs'", 'to': "orm['mirrors.MirrorUrl']"}) + }, + 'mirrors.mirrorprotocol': { + 'Meta': {'object_name': 'MirrorProtocol'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'protocol': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '10'}) + }, + 'mirrors.mirrorrsync': { + 'Meta': {'object_name': 'MirrorRsync'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'ip': ('django.db.models.fields.CharField', [], {'max_length': '24'}), + 'mirror': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'rsync_ips'", 'to': "orm['mirrors.Mirror']"}) + }, + 'mirrors.mirrorurl': { + 'Meta': {'object_name': 'MirrorUrl'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'mirror': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'urls'", 'to': "orm['mirrors.Mirror']"}), + 'protocol': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'urls'", 'to': "orm['mirrors.MirrorProtocol']"}), + 'url': ('django.db.models.fields.CharField', [], {'max_length': '255'}) + } + } + + complete_apps = ['mirrors'] diff --git a/mirrors/models.py b/mirrors/models.py index 94256a9c..5cab9db6 100644 --- a/mirrors/models.py +++ b/mirrors/models.py @@ -54,4 +54,18 @@ class MirrorRsync(models.Model): class Meta: verbose_name = 'Mirror Rsync IP' +class MirrorLog(models.Model): + url = models.ForeignKey(MirrorUrl, related_name="logs") + check_time = models.DateTimeField(db_index=True) + last_sync = models.DateTimeField(null=True) + duration = models.FloatField(null=True) + is_success = models.BooleanField(default=True) + error = models.CharField(max_length=255, blank=True, default='') + + def __unicode__(self): + return "Check of %s at %s" % (url.url, check_time) + + class Meta: + verbose_name = 'Mirror Check Log' + # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From 7c88e3e1a2b5d36281b3981e2144c2dd16c7596c Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 10 Sep 2010 18:42:59 -0500 Subject: Rename generate mirrorlist view Signed-off-by: Dan McGee --- mirrors/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mirrors') diff --git a/mirrors/views.py b/mirrors/views.py index ddc42cbb..b4c2c2c7 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -21,7 +21,7 @@ class MirrorlistForm(forms.Form): self.fields['protocol'].initial = [t[0] for t in protos] @csrf_exempt -def generate(request): +def generate_mirrorlist(request): if request.REQUEST.get('country', ''): form = MirrorlistForm(data=request.REQUEST) if form.is_valid(): -- cgit v1.2.3-2-g168b From 3d8bc07622561028dbca9c709470accf79c95bd6 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 16 Sep 2010 16:45:30 -0500 Subject: Add 'mirrorcheck' command This does the actual work of going out and checking the mirror status. In short, it polls every active mirror URL for the 'lastsync' file and then records the appropriate details. These include the contents of that file, how long the total time to retrieve took, and any errors encountered. In order to finish up a bit faster, we spawn several threads to do the actual work. This parallelization allows the whole check process to take around 30 seconds rather than several minutes. Signed-off-by: Dan McGee --- mirrors/management/__init__.py | 0 mirrors/management/commands/__init__.py | 0 mirrors/management/commands/mirrorcheck.py | 149 +++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+) create mode 100644 mirrors/management/__init__.py create mode 100644 mirrors/management/commands/__init__.py create mode 100644 mirrors/management/commands/mirrorcheck.py (limited to 'mirrors') diff --git a/mirrors/management/__init__.py b/mirrors/management/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mirrors/management/commands/__init__.py b/mirrors/management/commands/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py new file mode 100644 index 00000000..4a933ca7 --- /dev/null +++ b/mirrors/management/commands/mirrorcheck.py @@ -0,0 +1,149 @@ +# -*- coding: utf-8 -*- +""" +mirrorcheck command + +Poll every active mirror URL we have in the database, grab the 'lastsync' file, +and record details about how long it took and how up to date the mirror is. If +we encounter errors, record those as well. + +Usage: ./manage.py mirrorcheck +""" + +from django.core.management.base import NoArgsCommand +from django.db.models import Q + +from datetime import datetime, timedelta +import logging +import re +import socket +import sys +import time +import thread +from threading import Thread +from Queue import Queue, Empty +import urllib2 + +from logging import ERROR, WARNING, INFO, DEBUG + +from mirrors.models import Mirror, MirrorUrl, MirrorLog + +logging.basicConfig( + level=WARNING, + format='%(asctime)s -> %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + stream=sys.stderr) +logger = logging.getLogger() + +class Command(NoArgsCommand): + help = "Runs a check on all known mirror URLs to determine their up-to-date status." + + def handle_noargs(self, **options): + v = int(options.get('verbosity', 0)) + if v == 0: + logger.level = ERROR + elif v == 1: + logger.level = INFO + elif v == 2: + logger.level = DEBUG + + import signal, traceback + handler = lambda sig, stack: traceback.print_stack(stack) + signal.signal(signal.SIGQUIT, handler) + signal.signal(signal.SIGUSR1, handler) + + return check_current_mirrors() + +def parse_rfc3339_datetime(time): + # '2010-09-02 11:05:06+02:00' + m = re.match('^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})([-+])(\d{2}):(\d{2})', time) + if m: + vals = m.groups() + parsed = datetime(int(vals[0]), int(vals[1]), int(vals[2]), + int(vals[3]), int(vals[4]), int(vals[5])) + # now account for time zone offset + sign = vals[6] + offset = timedelta(hours=int(sign + vals[7]), + minutes=int(sign + vals[8])) + # subtract the offset, e.g. '-04:00' should be moved up 4 hours + return parsed - offset + return None + +def check_mirror_url(mirror_url): + url = mirror_url.url + 'lastsync' + logger.info("checking URL %s" % url) + log = MirrorLog(url=mirror_url, check_time=datetime.utcnow()) + try: + start = time.time() + result = urllib2.urlopen(url, timeout=10) + data = result.read() + result.close() + end = time.time() + # lastsync should be an epoch value, but some mirrors + # are creating their own in RFC-3339 format: + # '2010-09-02 11:05:06+02:00' + try: + parsed_time = datetime.utcfromtimestamp(int(data)) + except ValueError: + parsed_time = parse_rfc3339_datetime(data) + + log.last_sync = parsed_time + log.duration = end - start + logger.debug("success: %s, %.2f" % (url, log.duration)) + except urllib2.HTTPError, e: + log.is_success = False + log.error =str(e) + logger.debug("failed: %s, %s" % (url, log.error)) + except urllib2.URLError, e: + log.is_success=False + log.error = e.reason + if isinstance(e.reason, socket.timeout): + log.error = "Connection timed out." + elif isinstance(e.reason, socket.error): + log.error = e.reason.args[1] + logger.debug("failed: %s, %s" % (url, log.error)) + + log.save() + return log + +def mirror_url_worker(queue): + while True: + try: + item = queue.get(block=False) + check_mirror_url(item) + queue.task_done() + except Empty: + return 0 + +class MirrorCheckPool(object): + def __init__(self, work, num_threads=10): + self.tasks = Queue() + for i in work: + self.tasks.put(i) + self.threads = [] + for i in range(num_threads): + thread = Thread(target=mirror_url_worker, args=(self.tasks,)) + thread.daemon = True + self.threads.append(thread) + + def run_and_join(self): + logger.debug("starting threads") + for t in self.threads: + t.start() + logger.debug("joining on all threads") + self.tasks.join() + +def check_current_mirrors(): + urls = MirrorUrl.objects.filter( + Q(protocol__protocol__iexact='HTTP') | + Q(protocol__protocol__iexact='FTP'), + mirror__active=True, mirror__public=True) + + pool = MirrorCheckPool(urls) + pool.run_and_join() + return 0 + +# For lack of a better place to put it, here is a query to get latest check +# result joined with mirror details: +# SELECT mu.*, m.*, ml.* FROM mirrors_mirrorurl mu JOIN mirrors_mirror m ON mu.mirror_id = m.id JOIN mirrors_mirrorlog ml ON mu.id = ml.url_id LEFT JOIN mirrors_mirrorlog ml2 ON ml.url_id = ml2.url_id AND ml.id < ml2.id WHERE ml2.id IS NULL AND m.active = 1 AND m.public = 1; + +# vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From 40ac4818aa7812a5399a0d4c176137984d5cfd30 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 12 Sep 2010 10:14:34 -0500 Subject: Add mirror status view Signed-off-by: Dan McGee --- mirrors/templatetags/__init__.py | 0 mirrors/templatetags/mirror_status.py | 15 +++++++++++ mirrors/views.py | 50 +++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 mirrors/templatetags/__init__.py create mode 100644 mirrors/templatetags/mirror_status.py (limited to 'mirrors') diff --git a/mirrors/templatetags/__init__.py b/mirrors/templatetags/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mirrors/templatetags/mirror_status.py b/mirrors/templatetags/mirror_status.py new file mode 100644 index 00000000..09c5b331 --- /dev/null +++ b/mirrors/templatetags/mirror_status.py @@ -0,0 +1,15 @@ +from django import template + +register = template.Library() + +@register.filter +def duration(value): + if not value: + return u'\u221e' + # does not take microseconds into account + total_secs = value.seconds + value.days * 24 * 3600 + mins, secs = divmod(total_secs, 60) + hrs, mins = divmod(mins, 60) + return '%d:%02d' % (hrs, mins) + +# vim: set ts=4 sw=4 et: diff --git a/mirrors/views.py b/mirrors/views.py index b4c2c2c7..a31c1371 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -1,9 +1,14 @@ from django import forms +from django.db.models import Avg, Count, Max, Min, StdDev +from django.db.models import Q from django.views.decorators.csrf import csrf_exempt from django.views.generic.simple import direct_to_template from main.utils import make_choice from .models import Mirror, MirrorUrl, MirrorProtocol +from .models import MirrorLog + +import datetime class MirrorlistForm(forms.Form): country = forms.MultipleChoiceField(required=False) @@ -49,4 +54,49 @@ def find_mirrors(request, countries=None, protocols=None): }, mimetype='text/plain') +def status(request): + cutoff_time = datetime.datetime.utcnow() - datetime.timedelta(hours=24) + bad_timedelta = datetime.timedelta(days=3) + + protocols = MirrorProtocol.objects.exclude(protocol__iexact='rsync') + # I swear, this actually has decent performance... + urls = MirrorUrl.objects.select_related( + 'mirror', 'protocol').filter( + mirror__active=True, mirror__public=True, + protocol__in=protocols).filter( + logs__check_time__gte=cutoff_time).annotate( + check_count=Count('logs'), last_sync=Max('logs__last_sync'), + last_check=Max('logs__check_time'), + duration_avg=Avg('logs__duration'), duration_min=Min('logs__duration'), + duration_max=Max('logs__duration'), duration_stddev=StdDev('logs__duration') + ).order_by('mirror__country', 'url') + # errors during check process go in another table + error_logs = MirrorLog.objects.filter( + is_success=False, check_time__gte=cutoff_time).values( + 'url__url', 'url__protocol__protocol', 'url__mirror__country', + 'error').annotate(Count('error'), Max('check_time')) + + good_urls = [] + bad_urls = [] + for url in urls: + if url.last_check and url.last_sync: + d = url.last_check - url.last_sync + url.delay = d + url.score = d.days * 24 + d.seconds / 3600 + url.duration_avg + url.duration_stddev + else: + url.delay = None + url.score = None + # split them into good and bad lists based on delay + if not url.delay or url.delay > bad_timedelta: + bad_urls.append(url) + else: + good_urls.append(url) + + context = { + 'good_urls': good_urls, + 'bad_urls': bad_urls, + 'error_logs': error_logs, + } + return direct_to_template(request, 'mirrors/status.html', context) + # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From 1f44788e1e701f78f71ba526183313bd0c4593aa Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 16 Sep 2010 17:16:18 -0500 Subject: Add debugger when having to parse lastsync file Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorcheck.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mirrors') diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py index 4a933ca7..c2ab077f 100644 --- a/mirrors/management/commands/mirrorcheck.py +++ b/mirrors/management/commands/mirrorcheck.py @@ -84,6 +84,8 @@ def check_mirror_url(mirror_url): try: parsed_time = datetime.utcfromtimestamp(int(data)) except ValueError: + logger.debug("attempting to parse generated lastsync file" + " from mirror %s, value %s" % (url, data)) parsed_time = parse_rfc3339_datetime(data) log.last_sync = parsed_time -- cgit v1.2.3-2-g168b From b8a78408ff194ca2f822979fec5598778eff5826 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Sep 2010 17:29:45 -0500 Subject: Small updates to mirrorcheck command Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorcheck.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'mirrors') diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py index c2ab077f..1662b15c 100644 --- a/mirrors/management/commands/mirrorcheck.py +++ b/mirrors/management/commands/mirrorcheck.py @@ -42,7 +42,7 @@ class Command(NoArgsCommand): if v == 0: logger.level = ERROR elif v == 1: - logger.level = INFO + logger.level = WARNING elif v == 2: logger.level = DEBUG @@ -84,8 +84,10 @@ def check_mirror_url(mirror_url): try: parsed_time = datetime.utcfromtimestamp(int(data)) except ValueError: - logger.debug("attempting to parse generated lastsync file" - " from mirror %s, value %s" % (url, data)) + # it is bad news to try logging the lastsync value; + # sometimes we get a crazy-encoded web page. + logger.info("attempting to parse generated lastsync file" + " from mirror %s" % url) parsed_time = parse_rfc3339_datetime(data) log.last_sync = parsed_time -- cgit v1.2.3-2-g168b From 2a296af10d34c65e0f94d1a5b70c84ba31596ba4 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Sep 2010 17:30:14 -0500 Subject: Add ordering, sorting, and a lot more info to mirror status page This should get this to the point where it is releasable to the general public for their use and pleasure. Still not sure on how often the check should be run, and we probably want to incorporate this mined data into some other things like the mirror list generator. Signed-off-by: Dan McGee --- mirrors/views.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'mirrors') diff --git a/mirrors/views.py b/mirrors/views.py index a31c1371..59d6337b 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -69,12 +69,16 @@ def status(request): last_check=Max('logs__check_time'), duration_avg=Avg('logs__duration'), duration_min=Min('logs__duration'), duration_max=Max('logs__duration'), duration_stddev=StdDev('logs__duration') - ).order_by('mirror__country', 'url') + ).order_by('-last_sync', '-duration_avg') # errors during check process go in another table error_logs = MirrorLog.objects.filter( is_success=False, check_time__gte=cutoff_time).values( 'url__url', 'url__protocol__protocol', 'url__mirror__country', - 'error').annotate(Count('error'), Max('check_time')) + 'error').annotate( + error_count=Count('error'), last_occurred=Max('check_time') + ).order_by('-last_occurred', '-error_count') + + last_check = max([u.last_check for u in urls]) good_urls = [] bad_urls = [] @@ -93,6 +97,7 @@ def status(request): good_urls.append(url) context = { + 'last_check': last_check, 'good_urls': good_urls, 'bad_urls': bad_urls, 'error_logs': error_logs, -- cgit v1.2.3-2-g168b