From 8d21e9f4b1d5a31880f9973d758de9becc90eb39 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sat, 28 Apr 2012 18:25:37 -0500 Subject: mirrorresolv: only run update query if values changed 98% of the time, we won't need to update the existing values as it will be the same as the prior run of this command. Do a quick check of the old and new values and don't send anything to the database if there is no need for an update. Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorresolv.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'mirrors/management') diff --git a/mirrors/management/commands/mirrorresolv.py b/mirrors/management/commands/mirrorresolv.py index 4e812f2d..0370f8ed 100644 --- a/mirrors/management/commands/mirrorresolv.py +++ b/mirrors/management/commands/mirrorresolv.py @@ -41,13 +41,19 @@ def resolve_mirrors(): logger.debug("requesting list of mirror URLs") for mirrorurl in MirrorUrl.objects.filter(mirror__active=True): try: + # save old values, we can skip no-op updates this way + oldvals = (mirrorurl.has_ipv4, mirrorurl.has_ipv6) logger.debug("resolving %3i (%s)", mirrorurl.id, mirrorurl.hostname) families = mirrorurl.address_families() mirrorurl.has_ipv4 = socket.AF_INET in families mirrorurl.has_ipv6 = socket.AF_INET6 in families logger.debug("%s: v4: %s v6: %s", mirrorurl.hostname, mirrorurl.has_ipv4, mirrorurl.has_ipv6) - mirrorurl.save(force_update=True) + # now check new values, only update if new != old + newvals = (mirrorurl.has_ipv4, mirrorurl.has_ipv6) + if newvals != oldvals: + logger.debug("values changed for %s", mirrorurl) + mirrorurl.save(force_update=True) except socket.error, e: logger.warn("error resolving %s: %s", mirrorurl.hostname, e) -- cgit v1.2.3-2-g168b From 44eb2d5ee0fa9e1b495027cec3e663ff85c0ed1d Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 29 Apr 2012 21:26:23 -0500 Subject: Use a custom User-Agent when checking mirror URLs Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorcheck.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'mirrors/management') diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py index 7ffb7773..c1269226 100644 --- a/mirrors/management/commands/mirrorcheck.py +++ b/mirrors/management/commands/mirrorcheck.py @@ -35,6 +35,7 @@ logging.basicConfig( stream=sys.stderr) logger = logging.getLogger() + class Command(NoArgsCommand): help = "Runs a check on all known mirror URLs to determine their up-to-date status." @@ -49,13 +50,16 @@ class Command(NoArgsCommand): return check_current_mirrors() + def check_mirror_url(mirror_url): url = mirror_url.url + 'lastsync' logger.info("checking URL %s", url) log = MirrorLog(url=mirror_url, check_time=utc_now()) + headers = {'User-Agent': 'archweb/1.0'} + req = urllib2.Request(url, None, headers) try: start = time.time() - result = urllib2.urlopen(url, timeout=10) + result = urllib2.urlopen(req, timeout=10) data = result.read() result.close() end = time.time() @@ -104,6 +108,7 @@ def check_mirror_url(mirror_url): return log + def mirror_url_worker(work, output): while True: try: @@ -116,11 +121,12 @@ def mirror_url_worker(work, output): except Empty: return 0 + class MirrorCheckPool(object): - def __init__(self, work, num_threads=10): + def __init__(self, urls, num_threads=10): self.tasks = Queue() self.logs = deque() - for i in list(work): + for i in list(urls): self.tasks.put(i) self.threads = [] for i in range(num_threads): @@ -140,6 +146,7 @@ class MirrorCheckPool(object): MirrorLog.objects.bulk_create(self.logs) logger.debug("log entries saved") + def check_current_mirrors(): urls = MirrorUrl.objects.filter( protocol__is_download=True, @@ -149,8 +156,4 @@ def check_current_mirrors(): pool.run() return 0 -# For lack of a better place to put it, here is a query to get latest check -# result joined with mirror details: -# SELECT mu.*, m.*, ml.* FROM mirrors_mirrorurl mu JOIN mirrors_mirror m ON mu.mirror_id = m.id JOIN mirrors_mirrorlog ml ON mu.id = ml.url_id LEFT JOIN mirrors_mirrorlog ml2 ON ml.url_id = ml2.url_id AND ml.id < ml2.id WHERE ml2.id IS NULL AND m.active = 1 AND m.public = 1; - # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From 2f7d770b261b3428bcff366ba6ff4fa631dd980a Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 13 May 2012 20:15:00 -0500 Subject: Add rsync support to mirrorcheck and other small improvements The main changes in this patch implement rsync:// protocol checking support by calling the rsync binary, requested in FS#29878. We track and log much of the same things as we already do for FTP and HTTP URLs- check time, last sync, total check duration, etc. Also added in this patch is a configurable timeout value which defaults to the previous hardcoded value of 10 seconds; this can be passed as an option to the mirrorcheck command. Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorcheck.py | 130 ++++++++++++++++++++--------- 1 file changed, 89 insertions(+), 41 deletions(-) (limited to 'mirrors/management') diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py index c1269226..ae89d5e0 100644 --- a/mirrors/management/commands/mirrorcheck.py +++ b/mirrors/management/commands/mirrorcheck.py @@ -9,22 +9,26 @@ we encounter errors, record those as well. Usage: ./manage.py mirrorcheck """ -from django.core.management.base import NoArgsCommand -from django.db import transaction - from collections import deque from datetime import datetime import logging +import os +from optparse import make_option +from pytz import utc import re import socket +import subprocess import sys import time +import tempfile from threading import Thread import types -from pytz import utc from Queue import Queue, Empty import urllib2 +from django.core.management.base import NoArgsCommand +from django.db import transaction + from main.utils import utc_now from mirrors.models import MirrorUrl, MirrorLog @@ -37,10 +41,15 @@ logger = logging.getLogger() class Command(NoArgsCommand): + option_list = NoArgsCommand.option_list + ( + make_option('-t', '--timeout', dest='timeout', default='10', + help='Timeout value for connecting to URL'), + ) help = "Runs a check on all known mirror URLs to determine their up-to-date status." def handle_noargs(self, **options): v = int(options.get('verbosity', 0)) + timeout = int(options.get('timeout', 10)) if v == 0: logger.level = logging.ERROR elif v == 1: @@ -48,10 +57,29 @@ class Command(NoArgsCommand): elif v == 2: logger.level = logging.DEBUG - return check_current_mirrors() + urls = MirrorUrl.objects.select_related('protocol').filter( + mirror__active=True, mirror__public=True) + + pool = MirrorCheckPool(urls, timeout) + pool.run() + return 0 -def check_mirror_url(mirror_url): +def parse_lastsync(log, data): + '''lastsync file should be an epoch value created by us.''' + try: + parsed_time = datetime.utcfromtimestamp(int(data)) + log.last_sync = parsed_time.replace(tzinfo=utc) + except ValueError: + # it is bad news to try logging the lastsync value; + # sometimes we get a crazy-encoded web page. + # if we couldn't parse a time, this is a failure. + log.last_sync = None + log.error = "Could not parse time from lastsync" + log.is_success = False + + +def check_mirror_url(mirror_url, timeout): url = mirror_url.url + 'lastsync' logger.info("checking URL %s", url) log = MirrorLog(url=mirror_url, check_time=utc_now()) @@ -59,28 +87,14 @@ def check_mirror_url(mirror_url): req = urllib2.Request(url, None, headers) try: start = time.time() - result = urllib2.urlopen(req, timeout=10) + result = urllib2.urlopen(req, timeout=timeout) data = result.read() result.close() end = time.time() - # lastsync should be an epoch value created by us - parsed_time = None - try: - parsed_time = datetime.utcfromtimestamp(int(data)) - parsed_time = parsed_time.replace(tzinfo=utc) - except ValueError: - # it is bad news to try logging the lastsync value; - # sometimes we get a crazy-encoded web page. - pass - - log.last_sync = parsed_time - # if we couldn't parse a time, this is a failure - if parsed_time is None: - log.error = "Could not parse time from lastsync" - log.is_success = False + parse_lastsync(log, data) log.duration = end - start logger.debug("success: %s, %.2f", url, log.duration) - except urllib2.HTTPError, e: + except urllib2.HTTPError as e: if e.code == 404: # we have a duration, just not a success end = time.time() @@ -88,7 +102,7 @@ def check_mirror_url(mirror_url): log.is_success = False log.error = str(e) logger.debug("failed: %s, %s", url, log.error) - except urllib2.URLError, e: + except urllib2.URLError as e: log.is_success = False log.error = e.reason if isinstance(e.reason, types.StringTypes) and \ @@ -101,20 +115,64 @@ def check_mirror_url(mirror_url): elif isinstance(e.reason, socket.error): log.error = e.reason.args[1] logger.debug("failed: %s, %s", url, log.error) - except socket.timeout, e: + except socket.timeout as e: log.is_success = False log.error = "Connection timed out." logger.debug("failed: %s, %s", url, log.error) + except socket.error as e: + log.is_success = False + log.error = str(e) + logger.debug("failed: %s, %s", url, log.error) + + return log + + +def check_rsync_url(mirror_url, timeout): + url = mirror_url.url + 'lastsync' + logger.info("checking URL %s", url) + log = MirrorLog(url=mirror_url, check_time=utc_now()) + + tempdir = tempfile.mkdtemp() + lastsync_path = os.path.join(tempdir, 'lastsync') + rsync_cmd = ["rsync", "--quiet", "--contimeout=%d" % timeout, + "--timeout=%d" % timeout, url, lastsync_path] + try: + with open(os.devnull, 'w') as devnull: + proc = subprocess.Popen(rsync_cmd, stdout=devnull, + stderr=subprocess.PIPE) + start = time.time() + _, errdata = proc.communicate() + end = time.time() + log.duration = end - start + if proc.returncode != 0: + logger.debug("error: %s, %s", url, errdata) + log.is_success = False + log.error = errdata.strip() + # look at rsync error code- if we had a command error or timed out, + # don't record a duration as it is misleading + if proc.returncode in (1, 30, 35): + log.duration = None + else: + logger.debug("success: %s, %.2f", url, log.duration) + with open(lastsync_path, 'r') as lastsync: + parse_lastsync(log, lastsync.read()) + finally: + if os.path.exists(lastsync_path): + os.unlink(lastsync_path) + os.rmdir(tempdir) return log -def mirror_url_worker(work, output): +def mirror_url_worker(work, output, timeout): while True: try: - item = work.get(block=False) + url = work.get(block=False) try: - log = check_mirror_url(item) + if url.protocol.protocol == 'rsync': + log = check_rsync_url(url, timeout) + else: + log = check_mirror_url(url, timeout) output.append(log) finally: work.task_done() @@ -123,7 +181,7 @@ def mirror_url_worker(work, output): class MirrorCheckPool(object): - def __init__(self, urls, num_threads=10): + def __init__(self, urls, timeout=10, num_threads=10): self.tasks = Queue() self.logs = deque() for i in list(urls): @@ -131,7 +189,7 @@ class MirrorCheckPool(object): self.threads = [] for i in range(num_threads): thread = Thread(target=mirror_url_worker, - args=(self.tasks, self.logs)) + args=(self.tasks, self.logs, timeout)) thread.daemon = True self.threads.append(thread) @@ -142,18 +200,8 @@ class MirrorCheckPool(object): thread.start() logger.debug("joining on all threads") self.tasks.join() - logger.debug("processing log entries") + logger.debug("processing %d log entries", len(self.logs)) MirrorLog.objects.bulk_create(self.logs) logger.debug("log entries saved") - -def check_current_mirrors(): - urls = MirrorUrl.objects.filter( - protocol__is_download=True, - mirror__active=True, mirror__public=True) - - pool = MirrorCheckPool(urls) - pool.run() - return 0 - # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From a87da032cb6b5b84624e4205b5f8b7cab37249cd Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 8 Jul 2012 20:36:51 -0500 Subject: Handle HTTPException being thrown in mirrorcheck Managed to see this bubble up today when running the mirrorcheck command on a less than ideal connection that was experiencing timeouts at the wrong time. Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorcheck.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'mirrors/management') diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py index ae89d5e0..3d431796 100644 --- a/mirrors/management/commands/mirrorcheck.py +++ b/mirrors/management/commands/mirrorcheck.py @@ -11,6 +11,7 @@ Usage: ./manage.py mirrorcheck from collections import deque from datetime import datetime +from httplib import HTTPException import logging import os from optparse import make_option @@ -115,6 +116,11 @@ def check_mirror_url(mirror_url, timeout): elif isinstance(e.reason, socket.error): log.error = e.reason.args[1] logger.debug("failed: %s, %s", url, log.error) + except HTTPException as e: + # e.g., BadStatusLine + log.is_success = False + log.error = "Exception in processing HTTP request." + logger.debug("failed: %s, %s", url, log.error) except socket.timeout as e: log.is_success = False log.error = "Connection timed out." -- cgit v1.2.3-2-g168b From 3c4ceb16331b37fd334dc9682d4cde6430838942 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 8 Jul 2012 20:56:28 -0500 Subject: mirrorcheck: Don't use bulk_create on sqlite3 It isn't worth it, as we run into the 999 max SQL statement variables issue when using it on any significant amount of mirrors. Since this is just a development database setup, and it isn't a command we need to run especially fast, we can ditch it. Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorcheck.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'mirrors/management') diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py index 3d431796..7a133cbf 100644 --- a/mirrors/management/commands/mirrorcheck.py +++ b/mirrors/management/commands/mirrorcheck.py @@ -30,7 +30,7 @@ import urllib2 from django.core.management.base import NoArgsCommand from django.db import transaction -from main.utils import utc_now +from main.utils import utc_now, database_vendor from mirrors.models import MirrorUrl, MirrorLog logging.basicConfig( @@ -207,7 +207,11 @@ class MirrorCheckPool(object): logger.debug("joining on all threads") self.tasks.join() logger.debug("processing %d log entries", len(self.logs)) - MirrorLog.objects.bulk_create(self.logs) + if database_vendor(MirrorLog, mode='write') == 'sqlite': + for log in self.logs: + log.save(force_insert=True) + else: + MirrorLog.objects.bulk_create(self.logs) logger.debug("log entries saved") # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From c0bf9e20660cfae7ea8994472555bba23398b598 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 24 Jul 2012 09:19:48 -0500 Subject: Remove custom utc_now() function, use django.utils.timezone.now() This was around from the time when we handled timezones sanely and Django did not; now that we are on 1.4 we no longer need our own code to handle this. Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorcheck.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'mirrors/management') diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py index 7a133cbf..e09ea680 100644 --- a/mirrors/management/commands/mirrorcheck.py +++ b/mirrors/management/commands/mirrorcheck.py @@ -29,8 +29,9 @@ import urllib2 from django.core.management.base import NoArgsCommand from django.db import transaction +from django.utils.timezone import now -from main.utils import utc_now, database_vendor +from main.utils import database_vendor from mirrors.models import MirrorUrl, MirrorLog logging.basicConfig( @@ -83,7 +84,7 @@ def parse_lastsync(log, data): def check_mirror_url(mirror_url, timeout): url = mirror_url.url + 'lastsync' logger.info("checking URL %s", url) - log = MirrorLog(url=mirror_url, check_time=utc_now()) + log = MirrorLog(url=mirror_url, check_time=now()) headers = {'User-Agent': 'archweb/1.0'} req = urllib2.Request(url, None, headers) try: @@ -136,7 +137,7 @@ def check_mirror_url(mirror_url, timeout): def check_rsync_url(mirror_url, timeout): url = mirror_url.url + 'lastsync' logger.info("checking URL %s", url) - log = MirrorLog(url=mirror_url, check_time=utc_now()) + log = MirrorLog(url=mirror_url, check_time=now()) tempdir = tempfile.mkdtemp() lastsync_path = os.path.join(tempdir, 'lastsync') -- cgit v1.2.3-2-g168b From 5566d43a7734f6bb2f48d5d511351da12ddc5cc1 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sat, 9 Feb 2013 16:43:40 -0600 Subject: Use 'update_fields' model.save() kwarg This was added in Django 1.5 and allows saving only a subset of a model's fields. It makes sense in a few cases to utilize it. Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorresolv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mirrors/management') diff --git a/mirrors/management/commands/mirrorresolv.py b/mirrors/management/commands/mirrorresolv.py index 0370f8ed..a6c2523e 100644 --- a/mirrors/management/commands/mirrorresolv.py +++ b/mirrors/management/commands/mirrorresolv.py @@ -53,7 +53,7 @@ def resolve_mirrors(): newvals = (mirrorurl.has_ipv4, mirrorurl.has_ipv6) if newvals != oldvals: logger.debug("values changed for %s", mirrorurl) - mirrorurl.save(force_update=True) + mirrorurl.save(update_fields=('has_ipv4', 'has_ipv6')) except socket.error, e: logger.warn("error resolving %s: %s", mirrorurl.hostname, e) -- cgit v1.2.3-2-g168b From 3e0209f5e8ee197034b6c1f705af515d8154801b Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 6 Mar 2013 19:56:48 -0600 Subject: Revert "mirrorcheck: Don't use bulk_create on sqlite3" This reverts commit 3c4ceb16. We don't need this anymore as bulk_create gets automatic batching now on sqlite3 so it is safe to use. Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorcheck.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'mirrors/management') diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py index e09ea680..2116ab29 100644 --- a/mirrors/management/commands/mirrorcheck.py +++ b/mirrors/management/commands/mirrorcheck.py @@ -31,7 +31,6 @@ from django.core.management.base import NoArgsCommand from django.db import transaction from django.utils.timezone import now -from main.utils import database_vendor from mirrors.models import MirrorUrl, MirrorLog logging.basicConfig( @@ -208,11 +207,7 @@ class MirrorCheckPool(object): logger.debug("joining on all threads") self.tasks.join() logger.debug("processing %d log entries", len(self.logs)) - if database_vendor(MirrorLog, mode='write') == 'sqlite': - for log in self.logs: - log.save(force_insert=True) - else: - MirrorLog.objects.bulk_create(self.logs) + MirrorLog.objects.bulk_create(self.logs) logger.debug("log entries saved") # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From 7c8b09b95ce5db9ddf7e895c2722bd202f5c4f54 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 6 Mar 2013 20:58:09 -0600 Subject: Teach mirrorcheck management command about check locations This adds the -l/--location argument to the command in order to pass in a check location that we are currently running from. This locks the IP address family to the one derived from the address on that location, and stores any check results tagged with a location ID. Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorcheck.py | 63 ++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 17 deletions(-) (limited to 'mirrors/management') diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py index 2116ab29..f133c785 100644 --- a/mirrors/management/commands/mirrorcheck.py +++ b/mirrors/management/commands/mirrorcheck.py @@ -31,7 +31,8 @@ from django.core.management.base import NoArgsCommand from django.db import transaction from django.utils.timezone import now -from mirrors.models import MirrorUrl, MirrorLog +from mirrors.models import MirrorUrl, MirrorLog, CheckLocation + logging.basicConfig( level=logging.WARNING, @@ -40,17 +41,17 @@ logging.basicConfig( stream=sys.stderr) logger = logging.getLogger() - class Command(NoArgsCommand): option_list = NoArgsCommand.option_list + ( - make_option('-t', '--timeout', dest='timeout', default='10', + make_option('-t', '--timeout', dest='timeout', type='float', default=10.0, help='Timeout value for connecting to URL'), + make_option('-l', '--location', dest='location', type='int', + help='ID of CheckLocation object to use for this run'), ) help = "Runs a check on all known mirror URLs to determine their up-to-date status." def handle_noargs(self, **options): v = int(options.get('verbosity', 0)) - timeout = int(options.get('timeout', 10)) if v == 0: logger.level = logging.ERROR elif v == 1: @@ -58,14 +59,35 @@ class Command(NoArgsCommand): elif v == 2: logger.level = logging.DEBUG + timeout = options.get('timeout') + urls = MirrorUrl.objects.select_related('protocol').filter( mirror__active=True, mirror__public=True) - pool = MirrorCheckPool(urls, timeout) + location = options.get('location', None) + if location: + location = CheckLocation.objects.get(id=location) + family = location.family + monkeypatch_getaddrinfo(family) + if family == socket.AF_INET6: + urls = urls.filter(has_ipv6=True) + elif family == socket.AF_INET: + urls = urls.filter(has_ipv4=True) + + pool = MirrorCheckPool(urls, location, timeout) pool.run() return 0 +def monkeypatch_getaddrinfo(force_family=socket.AF_INET): + '''Force the Python socket module to connect over the designated family; + e.g. socket.AF_INET or socket.AF_INET6.''' + orig = socket.getaddrinfo + def wrapper(host, port, family=0, socktype=0, proto=0, flags=0): + return orig(host, port, force_family, socktype, proto, flags) + socket.getaddrinfo = wrapper + + def parse_lastsync(log, data): '''lastsync file should be an epoch value created by us.''' try: @@ -80,10 +102,10 @@ def parse_lastsync(log, data): log.is_success = False -def check_mirror_url(mirror_url, timeout): +def check_mirror_url(mirror_url, location, timeout): url = mirror_url.url + 'lastsync' logger.info("checking URL %s", url) - log = MirrorLog(url=mirror_url, check_time=now()) + log = MirrorLog(url=mirror_url, check_time=now(), location=location) headers = {'User-Agent': 'archweb/1.0'} req = urllib2.Request(url, None, headers) try: @@ -133,17 +155,24 @@ def check_mirror_url(mirror_url, timeout): return log -def check_rsync_url(mirror_url, timeout): +def check_rsync_url(mirror_url, location, timeout): url = mirror_url.url + 'lastsync' logger.info("checking URL %s", url) - log = MirrorLog(url=mirror_url, check_time=now()) + log = MirrorLog(url=mirror_url, check_time=now(), location=location) tempdir = tempfile.mkdtemp() + ipopt = '' + if location: + if location.family == socket.AF_INET6: + ipopt = '--ipv6' + elif location.family == socket.AF_INET: + ipopt = '--ipv4' lastsync_path = os.path.join(tempdir, 'lastsync') rsync_cmd = ["rsync", "--quiet", "--contimeout=%d" % timeout, - "--timeout=%d" % timeout, url, lastsync_path] + "--timeout=%d" % timeout, ipopt, url, lastsync_path] try: with open(os.devnull, 'w') as devnull: + logger.debug("rsync cmd: %s", ' '.join(rsync_cmd)) proc = subprocess.Popen(rsync_cmd, stdout=devnull, stderr=subprocess.PIPE) start = time.time() @@ -170,15 +199,15 @@ def check_rsync_url(mirror_url, timeout): return log -def mirror_url_worker(work, output, timeout): +def mirror_url_worker(work, output, location, timeout): while True: try: url = work.get(block=False) try: if url.protocol.protocol == 'rsync': - log = check_rsync_url(url, timeout) + log = check_rsync_url(url, location, timeout) else: - log = check_mirror_url(url, timeout) + log = check_mirror_url(url, location, timeout) output.append(log) finally: work.task_done() @@ -187,15 +216,15 @@ def mirror_url_worker(work, output, timeout): class MirrorCheckPool(object): - def __init__(self, urls, timeout=10, num_threads=10): + def __init__(self, urls, location, timeout=10, num_threads=10): self.tasks = Queue() self.logs = deque() - for i in list(urls): - self.tasks.put(i) + for url in list(urls): + self.tasks.put(url) self.threads = [] for i in range(num_threads): thread = Thread(target=mirror_url_worker, - args=(self.tasks, self.logs, timeout)) + args=(self.tasks, self.logs, location, timeout)) thread.daemon = True self.threads.append(thread) -- cgit v1.2.3-2-g168b From ace95f6e53f41409568d4e4f1cf4c2a69d931e2c Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 6 Mar 2013 21:38:58 -0600 Subject: Don't add blank options to rsync command line Rsync doesn't like this so much: Unexpected remote arg: rsync://mirror.example.com/archlinux/lastsync rsync error: syntax or usage error (code 1) at main.c(1214) [sender=3.0.9] Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorcheck.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'mirrors/management') diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py index f133c785..1315a013 100644 --- a/mirrors/management/commands/mirrorcheck.py +++ b/mirrors/management/commands/mirrorcheck.py @@ -169,7 +169,11 @@ def check_rsync_url(mirror_url, location, timeout): ipopt = '--ipv4' lastsync_path = os.path.join(tempdir, 'lastsync') rsync_cmd = ["rsync", "--quiet", "--contimeout=%d" % timeout, - "--timeout=%d" % timeout, ipopt, url, lastsync_path] + "--timeout=%d" % timeout] + if ipopt: + rsync_cmd.append(ipopt) + rsync_cmd.append(url) + rsync_cmd.append(lastsync_path) try: with open(os.devnull, 'w') as devnull: logger.debug("rsync cmd: %s", ' '.join(rsync_cmd)) -- cgit v1.2.3-2-g168b From 46d21e03e81e4cacc849d798052b3ffd525d638a Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 12 Mar 2013 20:18:51 -0500 Subject: Don't check FTP + IPv6 combination Very few, if any, FTP servers support connections over IPv6. Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorcheck.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'mirrors/management') diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py index 1315a013..93b53d6b 100644 --- a/mirrors/management/commands/mirrorcheck.py +++ b/mirrors/management/commands/mirrorcheck.py @@ -41,6 +41,7 @@ logging.basicConfig( stream=sys.stderr) logger = logging.getLogger() + class Command(NoArgsCommand): option_list = NoArgsCommand.option_list + ( make_option('-t', '--timeout', dest='timeout', type='float', default=10.0, @@ -83,8 +84,10 @@ def monkeypatch_getaddrinfo(force_family=socket.AF_INET): '''Force the Python socket module to connect over the designated family; e.g. socket.AF_INET or socket.AF_INET6.''' orig = socket.getaddrinfo + def wrapper(host, port, family=0, socktype=0, proto=0, flags=0): return orig(host, port, force_family, socktype, proto, flags) + socket.getaddrinfo = wrapper @@ -103,6 +106,12 @@ def parse_lastsync(log, data): def check_mirror_url(mirror_url, location, timeout): + if location: + if location.family == socket.AF_INET6: + ipopt = '--ipv6' + elif location.family == socket.AF_INET: + ipopt = '--ipv4' + url = mirror_url.url + 'lastsync' logger.info("checking URL %s", url) log = MirrorLog(url=mirror_url, check_time=now(), location=location) @@ -210,9 +219,14 @@ def mirror_url_worker(work, output, location, timeout): try: if url.protocol.protocol == 'rsync': log = check_rsync_url(url, location, timeout) + if (url.protocol.protocol == 'ftp' and location and + location.family == socket.AF_INET6): + # IPv6 + FTP don't work; skip checking completely + log = None else: log = check_mirror_url(url, location, timeout) - output.append(log) + if log: + output.append(log) finally: work.task_done() except Empty: -- cgit v1.2.3-2-g168b From b8ee7b1ee281b45b245fb454228b8ad847c56200 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 13 Mar 2013 13:36:14 -0500 Subject: mirrorcheck: s/if/elif/ when determining what check function to run This was a silly thinko here; it caused the logs to fill up with a bunch of 'unknown url type: rsync' errors. Signed-off-by: Dan McGee --- mirrors/management/commands/mirrorcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mirrors/management') diff --git a/mirrors/management/commands/mirrorcheck.py b/mirrors/management/commands/mirrorcheck.py index 93b53d6b..d6de8f22 100644 --- a/mirrors/management/commands/mirrorcheck.py +++ b/mirrors/management/commands/mirrorcheck.py @@ -219,7 +219,7 @@ def mirror_url_worker(work, output, location, timeout): try: if url.protocol.protocol == 'rsync': log = check_rsync_url(url, location, timeout) - if (url.protocol.protocol == 'ftp' and location and + elif (url.protocol.protocol == 'ftp' and location and location.family == socket.AF_INET6): # IPv6 + FTP don't work; skip checking completely log = None -- cgit v1.2.3-2-g168b