diff options
Diffstat (limited to 'devel')
-rw-r--r-- | devel/management/commands/reporead.py | 125 | ||||
-rw-r--r-- | devel/tests.py | 5 | ||||
-rw-r--r-- | devel/urls.py | 4 | ||||
-rw-r--r-- | devel/views.py | 18 |
4 files changed, 94 insertions, 58 deletions
diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index a8875c7e..baf7fee1 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -18,7 +18,8 @@ from django.contrib.auth.models import User from django.db import transaction from django.db.models import Q -import codecs +from collections import defaultdict +import io import os import re import sys @@ -27,14 +28,6 @@ import logging from datetime import datetime from optparse import make_option -# New in 2.6, but fast (C implementation) in 2.7. We will use it over codecs if -# available. Eventually remove the codecs import completely. -io = None -try: - import io -except ImportError: - pass - from main.models import Arch, Package, PackageDepend, PackageFile, Repo from packages.models import Conflict, Provision, Replacement @@ -74,18 +67,13 @@ class Command(BaseCommand): elif v == 2: logger.level = logging.DEBUG - import signal, traceback - handler = lambda sig, stack: traceback.print_stack(stack) - signal.signal(signal.SIGQUIT, handler) - signal.signal(signal.SIGUSR1, handler) - return read_repo(arch, filename, options) class Pkg(object): """An interim 'container' object for holding Arch package data.""" bare = ( 'name', 'base', 'arch', 'desc', 'filename', - 'md5sum', 'url', 'builddate', 'packager' ) + 'md5sum', 'url', 'packager' ) number = ( 'csize', 'isize' ) collections = ( 'depends', 'optdepends', 'conflicts', 'provides', 'replaces', 'groups', 'license', 'files' ) @@ -101,8 +89,7 @@ class Pkg(object): setattr(self, k, None) for k in self.collections: setattr(self, k, ()) - # So we can tell the diffence between a package with no files, and a DB - # without files entries + self.files = None self.has_files = False def populate(self, values): @@ -118,12 +105,22 @@ class Pkg(object): self.rel = match.group(4) if match.group(2): self.epoch = int(match.group(2)) + elif k == 'builddate': + try: + self.builddate = datetime.utcfromtimestamp(int(v[0])) + except ValueError: + try: + self.builddate = datetime.strptime(v[0], + '%a %b %d %H:%M:%S %Y') + except ValueError: + logger.warning('Package %s had unparsable build date %s', + self.name, v[0]) elif k == 'files': - self.files = v + self.files = tuple(v) self.has_files = True else: # anything left in collections - setattr(self, k, v) + setattr(self, k, tuple(v)) @property def full_version(self): @@ -235,6 +232,8 @@ def create_multivalued(dbpkg, repopkg, db_attr, repo_attr): collection.create(name=name) def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): + db_score = 1 + if repopkg.base: dbpkg.pkgbase = repopkg.base else: @@ -247,15 +246,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.filename = repopkg.filename dbpkg.compressed_size = repopkg.csize dbpkg.installed_size = repopkg.isize - try: - dbpkg.build_date = datetime.utcfromtimestamp(int(repopkg.builddate)) - except ValueError: - try: - dbpkg.build_date = datetime.strptime(repopkg.builddate, - '%a %b %d %H:%M:%S %Y') - except ValueError: - logger.warning('Package %s had unparsable build date %s', - repopkg.name, repopkg.builddate) + dbpkg.build_date = repopkg.builddate dbpkg.packager_str = repopkg.packager # attempt to find the corresponding django user for this string dbpkg.packager = find_user(repopkg.packager) @@ -265,7 +256,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.last_update = timestamp dbpkg.save() - populate_files(dbpkg, repopkg, force=force) + db_score += populate_files(dbpkg, repopkg, force=force) dbpkg.packagedepend_set.all().delete() for y in repopkg.depends: @@ -286,6 +277,15 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): create_multivalued(dbpkg, repopkg, 'groups', 'groups') create_multivalued(dbpkg, repopkg, 'licenses', 'license') + related_score = (len(repopkg.depends) + len(repopkg.optdepends) + + len(repopkg.conflicts) + len(repopkg.provides) + + len(repopkg.replaces) + len(repopkg.groups) + + len(repopkg.license)) + if related_score: + db_score += (related_score / 20) + 1 + + return db_score + def populate_files(dbpkg, repopkg, force=False): if not force: @@ -294,11 +294,11 @@ def populate_files(dbpkg, repopkg, force=False): logger.info("DB version (%s) didn't match repo version " "(%s) for package %s, skipping file list addition", dbpkg.full_version, repopkg.full_version, dbpkg.pkgname) - return + return 0 if not dbpkg.files_last_update or not dbpkg.last_update: pass elif dbpkg.files_last_update > dbpkg.last_update: - return + return 0 # only delete files if we are reading a DB that contains them if repopkg.has_files: dbpkg.packagefile_set.all().delete() @@ -317,6 +317,28 @@ def populate_files(dbpkg, repopkg, force=False): pkgfile.save(force_insert=True) dbpkg.files_last_update = datetime.utcnow() dbpkg.save() + return (len(repopkg.files) / 50) + 1 + return 0 + + +class Batcher(object): + def __init__(self, threshold, start=0): + self.threshold = threshold + self.meter = start + + def batch_commit(self, score): + """ + Track updates to the database and perform a commit if the batch + becomes sufficiently large. "Large" is defined by waiting for the + sum of scores to exceed the arbitrary threshold value; once it is + hit a commit is issued. + """ + self.meter += score + if self.meter > self.threshold: + logger.debug("Committing transaction, batch threshold hit") + transaction.commit() + self.meter = 0 + @transaction.commit_on_success def db_update(archname, reponame, pkgs, options): @@ -369,19 +391,23 @@ def db_update(archname, reponame, pkgs, options): elif dbpercent < 75.0: logger.warning(msg) + batcher = Batcher(100) + if not filesonly: # packages in syncdb and not in database (add to database) for p in [x for x in pkgs if x.name in in_sync_not_db]: logger.info("Adding package %s", p.name) - pkg = Package(pkgname = p.name, arch = architecture, repo = repository) - populate_pkg(pkg, p, timestamp=datetime.utcnow()) + pkg = Package(pkgname=p.name, arch=architecture, repo=repository) + score = populate_pkg(pkg, p, timestamp=datetime.utcnow()) + batcher.batch_commit(score) # packages in database and not in syncdb (remove from database) in_db_not_sync = dbset - syncset for p in in_db_not_sync: - logger.info("Removing package %s from database", p) + logger.info("Removing package %s", p) dbp = dbdict[p] dbp.delete() + batcher.batch_commit(1) # packages in both database and in syncdb (update in database) pkg_in_both = syncset & dbset @@ -399,12 +425,15 @@ def db_update(archname, reponame, pkgs, options): continue else: timestamp = datetime.utcnow() + if filesonly: - logger.debug("Checking files for package %s in database", p.name) - populate_files(dbp, p, force=force) + logger.debug("Checking files for package %s", p.name) + score = populate_files(dbp, p, force=force) else: - logger.info("Updating package %s in database", p.name) - populate_pkg(dbp, p, force=force, timestamp=timestamp) + logger.info("Updating package %s", p.name) + score = populate_pkg(dbp, p, force=force, timestamp=timestamp) + + batcher.batch_commit(score) logger.info('Finished updating Arch: %s', archname) @@ -454,23 +483,19 @@ def parse_repo(repopath): repodb = tarfile.open(repopath, "r") logger.debug("Starting package parsing") dbfiles = ('desc', 'depends', 'files') - pkgs = {} + newpkg = lambda: Pkg(reponame) + pkgs = defaultdict(newpkg) for tarinfo in repodb.getmembers(): if tarinfo.isreg(): pkgid, fname = os.path.split(tarinfo.name) if fname not in dbfiles: continue data_file = repodb.extractfile(tarinfo) - if io is None: - data_file = codecs.EncodedFile(data_file, 'utf-8') - else: - data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), - encoding='utf=8') + data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), + encoding='utf=8') try: - data = parse_info(data_file) - p = pkgs.setdefault(pkgid, Pkg(reponame)) - p.populate(data) - except UnicodeDecodeError, e: + pkgs[pkgid].populate(parse_info(data_file)) + except UnicodeDecodeError: logger.warn("Could not correctly decode %s, skipping file", tarinfo.name) data_file.close() @@ -503,7 +528,9 @@ def read_repo(primary_arch, repo_file, options): else: # we don't include mis-arched packages logger.warning("Package %s arch = %s", - package.name,package.arch) + package.name, package.arch) + del packages + logger.info('Starting database updates.') for arch in sorted(packages_arches.keys()): db_update(arch, repo, packages_arches[arch], options) diff --git a/devel/tests.py b/devel/tests.py index da5459d6..604da74c 100644 --- a/devel/tests.py +++ b/devel/tests.py @@ -26,7 +26,4 @@ class DevelTest(TestCase): def test_mirrors(self): response = self.client.get('/mirrors/') - self.assertEqual(response.status_code, 302) - self.assertEqual(response.has_header('Location'), True) - self.assertEqual(response['location'], - 'http://testserver/login/?next=/mirrors/') + self.assertEqual(response.status_code, 200) diff --git a/devel/urls.py b/devel/urls.py index 8759562e..bc3bcace 100644 --- a/devel/urls.py +++ b/devel/urls.py @@ -3,8 +3,8 @@ from django.conf.urls.defaults import patterns urlpatterns = patterns('devel.views', (r'^admin_log/$','admin_log'), (r'^admin_log/(?P<username>.*)/$','admin_log'), - (r'^clock/$', 'clock'), - (r'^$', 'index'), + (r'^clock/$', 'clock', {}, 'devel-clocks'), + (r'^$', 'index', {}, 'devel-index'), (r'^newuser/$', 'new_user_form'), (r'^profile/$', 'change_profile'), (r'^reports/(?P<report>.*)/(?P<username>.*)/$', 'report'), diff --git a/devel/views.py b/devel/views.py index 89963d27..3bf68a58 100644 --- a/devel/views.py +++ b/devel/views.py @@ -32,7 +32,7 @@ from string import ascii_letters, digits def index(request): '''the developer dashboard''' inner_q = PackageRelation.objects.filter(user=request.user).values('pkgbase') - flagged = Package.objects.select_related('arch', 'repo').filter( + flagged = Package.objects.normal().filter( flag_date__isnull=False, pkgbase__in=inner_q).order_by('pkgname') todopkgs = TodolistPkg.objects.select_related( @@ -102,6 +102,13 @@ class ProfileForm(forms.Form): return self.cleaned_data class UserProfileForm(forms.ModelForm): + def clean_pgp_key(self): + data = self.cleaned_data['pgp_key'] + # strip 0x prefix if provided; store uppercase + if data.startswith('0x'): + data = data[2:] + return data.upper() + class Meta: model = UserProfile exclude = ['allowed_repos', 'user'] @@ -128,14 +135,19 @@ def change_profile(request): @login_required def report(request, report, username=None): title = 'Developer Report' - packages = Package.objects.select_related('arch', 'repo') + packages = Package.objects.normal() names = attrs = user = None if report == 'old': title = 'Packages last built more than two years ago' - cutoff = datetime.now() - timedelta(days=730) + cutoff = datetime.now() - timedelta(days=365 * 2) packages = packages.filter( build_date__lt=cutoff).order_by('build_date') + elif report == 'long-out-of-date': + title = 'Packages marked out-of-date more than 90 days ago' + cutoff = datetime.now() - timedelta(days=90) + packages = packages.filter( + flag_date__lt=cutoff).order_by('flag_date') elif report == 'big': title = 'Packages with compressed size > 50 MiB' cutoff = 50 * 1024 * 1024 |