From 7c26f6b7a4d29faede58d2feb13ef961e4725637 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 19 Oct 2014 14:08:15 -0500 Subject: Use raw DB query to fetch last modified date Signed-off-by: Dan McGee --- mirrors/views.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'mirrors') diff --git a/mirrors/views.py b/mirrors/views.py index 26b5b802..55c40c4d 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -6,6 +6,7 @@ from operator import attrgetter, itemgetter from django import forms from django.forms.widgets import CheckboxSelectMultiple from django.core.serializers.json import DjangoJSONEncoder +from django.db import connection from django.db.models import Q from django.http import Http404, HttpResponse from django.shortcuts import get_object_or_404, redirect, render @@ -222,7 +223,9 @@ def url_details(request, name, url_id): def status_last_modified(request, *args, **kwargs): - return MirrorLog.objects.values_list('check_time', flat=True).latest() + cursor = connection.cursor() + cursor.execute("SELECT MAX(check_time) FROM mirrors_mirrorlog") + return cursor.fetchone()[0] @condition(last_modified_func=status_last_modified) -- cgit v1.2.3-2-g168b From ee6bf2782068b917232c71189aea0011b47e876d Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 09:10:28 -0500 Subject: Small performance tweaks to mirror status JSON encoding Do a few things to speed up the encoding of the JSON, including better usage of list comprehensions, less dynamic setattr() usage, and removal of the queryset specialization since we can easily do it outside of the encoder. Signed-off-by: Dan McGee --- mirrors/utils.py | 30 ++++++++++++++---------------- mirrors/views.py | 7 ++----- 2 files changed, 16 insertions(+), 21 deletions(-) (limited to 'mirrors') diff --git a/mirrors/utils.py b/mirrors/utils.py index fe18cd6a..4484fa24 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -84,19 +84,16 @@ GROUP BY l.url_id, u.mirror_id def annotate_url(url, url_data): '''Given a MirrorURL object, add a few more attributes to it regarding status, including completion_pct, delay, and score.''' - known_attrs = ( - ('success_count', 0), - ('check_count', 0), - ('completion_pct', None), - ('duration_avg', None), - ('duration_stddev', None), - ('last_check', None), - ('last_sync', None), - ('delay', None), - ('score', None), - ) - for k, v in known_attrs: - setattr(url, k, v) + # set up some sane default values in case we are missing anything + url.success_count = 0 + url.check_count = 0 + url.completion_pct = None + url.duration_avg = None + url.duration_stddev = None + url.last_check = None + url.last_sync = None + url.delay = None + url.score = None for k, v in url_data.items(): if k not in ('url_id', 'mirror_id'): setattr(url, k, v) @@ -107,7 +104,7 @@ def annotate_url(url, url_data): if url.delay is not None: hours = url.delay.days * 24.0 + url.delay.seconds / 3600.0 - if url.completion_pct > 0: + if url.completion_pct > 0.0: divisor = url.completion_pct else: # arbitrary small value @@ -115,6 +112,8 @@ def annotate_url(url, url_data): stddev = url.duration_stddev or 0.0 url.score = (hours + url.duration_avg + stddev) / divisor + return url + def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): cutoff_time = now() - cutoff @@ -133,8 +132,7 @@ def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): id__in=valid_urls).order_by('mirror__id', 'url') if urls: - for url in urls: - annotate_url(url, url_data.get(url.id, {})) + urls = [annotate_url(url, url_data.get(url.id, {})) for url in urls] last_check = max([u.last_check for u in urls if u.last_check]) num_checks = max([u.check_count for u in urls]) check_info = MirrorLog.objects.filter(check_time__gte=cutoff_time) diff --git a/mirrors/views.py b/mirrors/views.py index 55c40c4d..0bf0a267 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -275,9 +275,6 @@ class MirrorStatusJSONEncoder(DjangoJSONEncoder): if isinstance(obj, timedelta): # always returned as integer seconds return obj.days * 24 * 3600 + obj.seconds - if hasattr(obj, '__iter__'): - # mainly for queryset serialization - return list(obj) if isinstance(obj, MirrorUrl): data = {attr: getattr(obj, attr) for attr in self.url_attributes} country = obj.country @@ -298,8 +295,8 @@ class ExtendedMirrorStatusJSONEncoder(MirrorStatusJSONEncoder): if isinstance(obj, MirrorUrl): data = super(ExtendedMirrorStatusJSONEncoder, self).default(obj) cutoff = now() - DEFAULT_CUTOFF - data['logs'] = obj.logs.filter( - check_time__gte=cutoff).order_by('check_time') + data['logs'] = list(obj.logs.filter( + check_time__gte=cutoff).order_by('check_time')) return data if isinstance(obj, MirrorLog): return {attr: getattr(obj, attr) for attr in self.log_attributes} -- cgit v1.2.3-2-g168b From 087b4b00031fed52eeddf05ae36825cb498680f0 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 09:11:48 -0500 Subject: Remove queryset specialization in JSON encoder Signed-off-by: Dan McGee --- mirrors/views.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'mirrors') diff --git a/mirrors/views.py b/mirrors/views.py index 0bf0a267..1a9741ed 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -323,9 +323,6 @@ class LocationJSONEncoder(DjangoJSONEncoder): '''Base JSONEncoder extended to handle CheckLocation objects.''' def default(self, obj): - if hasattr(obj, '__iter__'): - # mainly for queryset serialization - return list(obj) if isinstance(obj, CheckLocation): return { 'id': obj.pk, @@ -341,7 +338,7 @@ class LocationJSONEncoder(DjangoJSONEncoder): def locations_json(request): data = {} data['version'] = 1 - data['locations'] = CheckLocation.objects.all().order_by('pk') + data['locations'] = list(CheckLocation.objects.all().order_by('pk')) to_json = json.dumps(data, ensure_ascii=False, cls=LocationJSONEncoder) response = HttpResponse(to_json, content_type='application/json') return response -- cgit v1.2.3-2-g168b From 86fd0b722afb53670ef9a155a3c55d688f275c6d Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 09:39:06 -0500 Subject: Reduce complexity of status data URL query Get rid of all the junk trying to only return URLs that have been checked in the last 24 hours; it just isn't worth it. Instead, do that screening only in the views that need it, namely the HTML status page. Signed-off-by: Dan McGee --- mirrors/utils.py | 14 +++++--------- mirrors/views.py | 5 ++++- 2 files changed, 9 insertions(+), 10 deletions(-) (limited to 'mirrors') diff --git a/mirrors/utils.py b/mirrors/utils.py index 4484fa24..8edceb9b 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -118,20 +118,16 @@ def annotate_url(url, url_data): def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): cutoff_time = now() - cutoff - valid_urls = MirrorUrl.objects.filter( - logs__check_time__gte=cutoff_time).distinct() - + urls = MirrorUrl.objects.select_related( + 'mirror', 'protocol').order_by('mirror__id', 'url') if mirror_id: - valid_urls = valid_urls.filter(mirror_id=mirror_id) + urls = urls.filter(mirror_id=mirror_id) if not show_all: - valid_urls = valid_urls.filter(active=True, mirror__active=True, + urls = urls.filter(active=True, mirror__active=True, mirror__public=True) - url_data = status_data(cutoff, mirror_id) - urls = MirrorUrl.objects.select_related('mirror', 'protocol').filter( - id__in=valid_urls).order_by('mirror__id', 'url') - if urls: + url_data = status_data(cutoff, mirror_id) urls = [annotate_url(url, url_data.get(url.id, {})) for url in urls] last_check = max([u.last_check for u in urls if u.last_check]) num_checks = max([u.check_count for u in urls]) diff --git a/mirrors/views.py b/mirrors/views.py index 1a9741ed..90787763 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -245,7 +245,10 @@ def status(request, tier=None): if tier is not None and url.mirror.tier != tier: continue # split them into good and bad lists based on delay - if not url.delay or url.delay > bad_timedelta: + if url.completion_pct is None: + # skip URLs that have never been checked + continue + elif not url.delay or url.delay > bad_timedelta: bad_urls.append(url) else: good_urls.append(url) -- cgit v1.2.3-2-g168b From 48509bfdbdadb8255f32c56d993f91262516916f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 10:00:48 -0500 Subject: Simplify/clean-up finding of download mirror Signed-off-by: Dan McGee --- mirrors/utils.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'mirrors') diff --git a/mirrors/utils.py b/mirrors/utils.py index 8edceb9b..fb867dee 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -189,23 +189,21 @@ def get_mirror_url_for_download(cutoff=DEFAULT_CUTOFF): if status_data['check_time__max'] is not None: min_check_time = status_data['check_time__max'] - timedelta(minutes=5) min_sync_time = status_data['last_sync__max'] - timedelta(minutes=20) - best_logs = MirrorLog.objects.filter(is_success=True, + best_logs = MirrorLog.objects.select_related('url').filter( + is_success=True, check_time__gte=min_check_time, last_sync__gte=min_sync_time, url__active=True, url__mirror__public=True, url__mirror__active=True, url__protocol__default=True).order_by( 'duration')[:1] if best_logs: - return MirrorUrl.objects.get(id=best_logs[0].url_id) + return best_logs[0].url mirror_urls = MirrorUrl.objects.filter(active=True, - mirror__public=True, mirror__active=True, protocol__default=True) - # look first for a country-agnostic URL, then fall back to any HTTP URL - filtered_urls = mirror_urls.filter(country='')[:1] - if not filtered_urls: - filtered_urls = mirror_urls[:1] - if not filtered_urls: + mirror__public=True, mirror__active=True, + protocol__default=True)[:1] + if not mirror_urls: return None - return filtered_urls[0] + return mirror_urls[0] # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From f53ea0b102d0251a98116aad445d55570c71931c Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 10:01:53 -0500 Subject: Move caching of function data back to get_mirror_statuses We've moved this around a few times, including changing the parameters to ensure they are stable (commit bdfa22500f4). However, the bulk of the work takes place in the mashing up of the data, so cache the full result rather than just the result of a single query. Signed-off-by: Dan McGee --- mirrors/utils.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'mirrors') diff --git a/mirrors/utils.py b/mirrors/utils.py index fb867dee..5685c9c8 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -21,9 +21,7 @@ def dictfetchall(cursor): for row in cursor.fetchall() ] -@cache_function(178) -def status_data(cutoff=DEFAULT_CUTOFF, mirror_id=None): - cutoff_time = now() - cutoff +def status_data(cutoff_time, mirror_id=None): if mirror_id is not None: params = [cutoff_time, mirror_id] mirror_where = 'AND u.mirror_id = %s' @@ -115,6 +113,7 @@ def annotate_url(url, url_data): return url +@cache_function(178) def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): cutoff_time = now() - cutoff @@ -127,7 +126,7 @@ def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): mirror__public=True) if urls: - url_data = status_data(cutoff, mirror_id) + url_data = status_data(cutoff_time, mirror_id) urls = [annotate_url(url, url_data.get(url.id, {})) for url in urls] last_check = max([u.last_check for u in urls if u.last_check]) num_checks = max([u.check_count for u in urls]) -- cgit v1.2.3-2-g168b From c86ef5c326212f09a22f5ae3502a0bc79033a23a Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 17:08:56 -0500 Subject: Use cache_page on mirror status JSON Signed-off-by: Dan McGee --- mirrors/views.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mirrors') diff --git a/mirrors/views.py b/mirrors/views.py index 90787763..c2736da8 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -11,6 +11,7 @@ from django.db.models import Q from django.http import Http404, HttpResponse from django.shortcuts import get_object_or_404, redirect, render from django.utils.timezone import now +from django.views.decorators.cache import cache_page from django.views.decorators.csrf import csrf_exempt from django.views.decorators.http import condition from django_countries.data import COUNTRIES @@ -306,6 +307,7 @@ class ExtendedMirrorStatusJSONEncoder(MirrorStatusJSONEncoder): return super(ExtendedMirrorStatusJSONEncoder, self).default(obj) +@cache_page(67) @condition(last_modified_func=status_last_modified) def status_json(request, tier=None): if tier is not None: -- cgit v1.2.3-2-g168b From 69d771978bcf7d70d106c3e704fde203451fd48e Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 17:32:29 -0500 Subject: Fix 500 when no URLs have been checked Signed-off-by: Dan McGee --- mirrors/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mirrors') diff --git a/mirrors/utils.py b/mirrors/utils.py index 5685c9c8..51daf50e 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -128,8 +128,8 @@ def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): if urls: url_data = status_data(cutoff_time, mirror_id) urls = [annotate_url(url, url_data.get(url.id, {})) for url in urls] - last_check = max([u.last_check for u in urls if u.last_check]) - num_checks = max([u.check_count for u in urls]) + last_check = max([u.last_check for u in urls if u.last_check] or [None]) + num_checks = max(u.check_count for u in urls) check_info = MirrorLog.objects.filter(check_time__gte=cutoff_time) if mirror_id: check_info = check_info.filter(url__mirror_id=mirror_id) -- cgit v1.2.3-2-g168b From d009cd0b5c4ba018681942daa59452fe2b1a42c2 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 21 Oct 2014 20:33:12 -0500 Subject: Fix error when viewing mirror with no active URLs Signed-off-by: Dan McGee --- mirrors/utils.py | 1 + mirrors/views.py | 2 ++ 2 files changed, 3 insertions(+) (limited to 'mirrors') diff --git a/mirrors/utils.py b/mirrors/utils.py index 51daf50e..930adb8d 100644 --- a/mirrors/utils.py +++ b/mirrors/utils.py @@ -141,6 +141,7 @@ def get_mirror_statuses(cutoff=DEFAULT_CUTOFF, mirror_id=None, show_all=False): else: check_frequency = None else: + urls = [] last_check = None num_checks = 0 check_frequency = None diff --git a/mirrors/views.py b/mirrors/views.py index c2736da8..cffafbf5 100644 --- a/mirrors/views.py +++ b/mirrors/views.py @@ -193,6 +193,8 @@ def mirror_details(request, name): def mirror_details_json(request, name): authorized = request.user.is_authenticated() mirror = get_object_or_404(Mirror, name=name) + if not authorized and (not mirror.public or not mirror.active): + raise Http404 status_info = get_mirror_statuses(mirror_id=mirror.id, show_all=authorized) data = status_info.copy() -- cgit v1.2.3-2-g168b