From 1840416b9e8892a685202f30b4079fd04607151f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 2 Jun 2011 16:21:08 -0500 Subject: Add a PGP key field on the dev profile Signed-off-by: Dan McGee --- devel/views.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'devel') diff --git a/devel/views.py b/devel/views.py index 475376ad..25ad2ccf 100644 --- a/devel/views.py +++ b/devel/views.py @@ -102,6 +102,13 @@ class ProfileForm(forms.Form): return self.cleaned_data class UserProfileForm(forms.ModelForm): + def clean_pgp_key(self): + data = self.cleaned_data['pgp_key'] + # strip 0x prefix if provided; store uppercase + if data.startswith('0x'): + data = data[2:] + return data.upper() + class Meta: model = UserProfile exclude = ['allowed_repos', 'user'] -- cgit v1.2.3-2-g168b From 5fe626c6cc8444603cf7ae5199271b69d38ff255 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 9 Jun 2011 16:17:42 -0500 Subject: Management command cleanup Now that we aren't seeing odd segfaults and hung tasks, we can remove the traceback stuff from the scripts. Also use the 'io' module only, it has been long enough. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index a8875c7e..1adc359e 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -18,7 +18,7 @@ from django.contrib.auth.models import User from django.db import transaction from django.db.models import Q -import codecs +import io import os import re import sys @@ -27,14 +27,6 @@ import logging from datetime import datetime from optparse import make_option -# New in 2.6, but fast (C implementation) in 2.7. We will use it over codecs if -# available. Eventually remove the codecs import completely. -io = None -try: - import io -except ImportError: - pass - from main.models import Arch, Package, PackageDepend, PackageFile, Repo from packages.models import Conflict, Provision, Replacement @@ -74,11 +66,6 @@ class Command(BaseCommand): elif v == 2: logger.level = logging.DEBUG - import signal, traceback - handler = lambda sig, stack: traceback.print_stack(stack) - signal.signal(signal.SIGQUIT, handler) - signal.signal(signal.SIGUSR1, handler) - return read_repo(arch, filename, options) @@ -101,8 +88,7 @@ class Pkg(object): setattr(self, k, None) for k in self.collections: setattr(self, k, ()) - # So we can tell the diffence between a package with no files, and a DB - # without files entries + self.files = None self.has_files = False def populate(self, values): @@ -461,16 +447,13 @@ def parse_repo(repopath): if fname not in dbfiles: continue data_file = repodb.extractfile(tarinfo) - if io is None: - data_file = codecs.EncodedFile(data_file, 'utf-8') - else: - data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), - encoding='utf=8') + data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), + encoding='utf=8') try: data = parse_info(data_file) p = pkgs.setdefault(pkgid, Pkg(reponame)) p.populate(data) - except UnicodeDecodeError, e: + except UnicodeDecodeError: logger.warn("Could not correctly decode %s, skipping file", tarinfo.name) data_file.close() -- cgit v1.2.3-2-g168b From 895f8a20d35a18f3a0cc6e1530eb40292270fc7c Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 9 Jun 2011 17:25:32 -0500 Subject: reporead: allow batching of package updates The real reason I originally added transactions to this code was to prevent half-updates; e.g. a package gets in without the matching depends values. We can safely commit between packages and resume processing the database at a later time. Take advantage of this fact and commit every so often in batch fashion if we have a lot of updates piling up. In the case of updating the files DB, this can really cut down on the need to hold open a long-running, statement heavy transaction and get the information public faster. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 58 +++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 9 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 1adc359e..e9878c93 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -221,6 +221,8 @@ def create_multivalued(dbpkg, repopkg, db_attr, repo_attr): collection.create(name=name) def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): + db_score = 1 + if repopkg.base: dbpkg.pkgbase = repopkg.base else: @@ -251,7 +253,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.last_update = timestamp dbpkg.save() - populate_files(dbpkg, repopkg, force=force) + db_score += populate_files(dbpkg, repopkg, force=force) dbpkg.packagedepend_set.all().delete() for y in repopkg.depends: @@ -272,6 +274,15 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): create_multivalued(dbpkg, repopkg, 'groups', 'groups') create_multivalued(dbpkg, repopkg, 'licenses', 'license') + related_score = (len(repopkg.depends) + len(repopkg.optdepends) + + len(repopkg.conflicts) + len(repopkg.provides) + + len(repopkg.replaces) + len(repopkg.groups) + + len(repopkg.license)) + if related_score: + db_score += (related_score / 20) + 1 + + return db_score + def populate_files(dbpkg, repopkg, force=False): if not force: @@ -280,11 +291,11 @@ def populate_files(dbpkg, repopkg, force=False): logger.info("DB version (%s) didn't match repo version " "(%s) for package %s, skipping file list addition", dbpkg.full_version, repopkg.full_version, dbpkg.pkgname) - return + return 0 if not dbpkg.files_last_update or not dbpkg.last_update: pass elif dbpkg.files_last_update > dbpkg.last_update: - return + return 0 # only delete files if we are reading a DB that contains them if repopkg.has_files: dbpkg.packagefile_set.all().delete() @@ -303,6 +314,28 @@ def populate_files(dbpkg, repopkg, force=False): pkgfile.save(force_insert=True) dbpkg.files_last_update = datetime.utcnow() dbpkg.save() + return (len(repopkg.files) / 50) + 1 + return 0 + + +class Batcher(object): + def __init__(self, threshold, start=0): + self.threshold = threshold + self.meter = start + + def batch_commit(self, score): + """ + Track updates to the database and perform a commit if the batch + becomes sufficiently large. "Large" is defined by waiting for the + sum of scores to exceed the arbitrary threshold value; once it is + hit a commit is issued. + """ + self.meter += score + if self.meter > self.threshold: + logger.debug("Committing transaction, batch threshold hit") + transaction.commit() + self.meter = 0 + @transaction.commit_on_success def db_update(archname, reponame, pkgs, options): @@ -355,19 +388,23 @@ def db_update(archname, reponame, pkgs, options): elif dbpercent < 75.0: logger.warning(msg) + batcher = Batcher(100) + if not filesonly: # packages in syncdb and not in database (add to database) for p in [x for x in pkgs if x.name in in_sync_not_db]: logger.info("Adding package %s", p.name) pkg = Package(pkgname = p.name, arch = architecture, repo = repository) - populate_pkg(pkg, p, timestamp=datetime.utcnow()) + score = populate_pkg(pkg, p, timestamp=datetime.utcnow()) + batcher.batch_commit(score) # packages in database and not in syncdb (remove from database) in_db_not_sync = dbset - syncset for p in in_db_not_sync: - logger.info("Removing package %s from database", p) + logger.info("Removing package %s", p) dbp = dbdict[p] dbp.delete() + batcher.batch_commit(score) # packages in both database and in syncdb (update in database) pkg_in_both = syncset & dbset @@ -385,12 +422,15 @@ def db_update(archname, reponame, pkgs, options): continue else: timestamp = datetime.utcnow() + if filesonly: - logger.debug("Checking files for package %s in database", p.name) - populate_files(dbp, p, force=force) + logger.debug("Checking files for package %s", p.name) + score = populate_files(dbp, p, force=force) else: - logger.info("Updating package %s in database", p.name) - populate_pkg(dbp, p, force=force, timestamp=timestamp) + logger.info("Updating package %s", p.name) + score = populate_pkg(dbp, p, force=force, timestamp=timestamp) + + batcher.batch_commit(score) logger.info('Finished updating Arch: %s', archname) -- cgit v1.2.3-2-g168b From 01b07b5b07cd152949c9f01fec91408945273583 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 9 Jun 2011 17:30:50 -0500 Subject: Fix busted batch score on package removal Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index e9878c93..0bd5587a 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -404,7 +404,7 @@ def db_update(archname, reponame, pkgs, options): logger.info("Removing package %s", p) dbp = dbdict[p] dbp.delete() - batcher.batch_commit(score) + batcher.batch_commit(1) # packages in both database and in syncdb (update in database) pkg_in_both = syncset & dbset -- cgit v1.2.3-2-g168b From 3284b302550dbd255762b8606343288d49402056 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Mon, 13 Jun 2011 13:18:28 -0500 Subject: Add a long out-of-date developer report This shows packages that have been marked out of date for more than 90 days in the repos. Signed-off-by: Dan McGee --- devel/views.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'devel') diff --git a/devel/views.py b/devel/views.py index 25ad2ccf..2b8bd43e 100644 --- a/devel/views.py +++ b/devel/views.py @@ -140,9 +140,14 @@ def report(request, report, username=None): if report == 'old': title = 'Packages last built more than two years ago' - cutoff = datetime.now() - timedelta(days=730) + cutoff = datetime.now() - timedelta(days=365 * 2) packages = packages.filter( build_date__lt=cutoff).order_by('build_date') + elif report == 'long-out-of-date': + title = 'Packages marked out-of-date more than 90 days ago' + cutoff = datetime.now() - timedelta(days=90) + packages = packages.filter( + flag_date__lt=cutoff).order_by('flag_date') elif report == 'big': title = 'Packages with compressed size > 50 MiB' cutoff = 50 * 1024 * 1024 -- cgit v1.2.3-2-g168b From 92dbad587ab77b84130b86153464647d583b677e Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 10 Jun 2011 10:43:57 -0500 Subject: reporead: two small cleanups * Parse builddate when reading from repo database file * Use defaultdict where it comes in handy Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 0bd5587a..4d30388e 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -18,6 +18,7 @@ from django.contrib.auth.models import User from django.db import transaction from django.db.models import Q +from collections import defaultdict import io import os import re @@ -72,7 +73,7 @@ class Command(BaseCommand): class Pkg(object): """An interim 'container' object for holding Arch package data.""" bare = ( 'name', 'base', 'arch', 'desc', 'filename', - 'md5sum', 'url', 'builddate', 'packager' ) + 'md5sum', 'url', 'packager' ) number = ( 'csize', 'isize' ) collections = ( 'depends', 'optdepends', 'conflicts', 'provides', 'replaces', 'groups', 'license', 'files' ) @@ -104,6 +105,16 @@ class Pkg(object): self.rel = match.group(4) if match.group(2): self.epoch = int(match.group(2)) + elif k == 'builddate': + try: + self.builddate = datetime.utcfromtimestamp(int(v[0])) + except ValueError: + try: + self.builddate = datetime.strptime(v[0], + '%a %b %d %H:%M:%S %Y') + except ValueError: + logger.warning('Package %s had unparsable build date %s', + self.name, v[0]) elif k == 'files': self.files = v self.has_files = True @@ -235,15 +246,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.filename = repopkg.filename dbpkg.compressed_size = repopkg.csize dbpkg.installed_size = repopkg.isize - try: - dbpkg.build_date = datetime.utcfromtimestamp(int(repopkg.builddate)) - except ValueError: - try: - dbpkg.build_date = datetime.strptime(repopkg.builddate, - '%a %b %d %H:%M:%S %Y') - except ValueError: - logger.warning('Package %s had unparsable build date %s', - repopkg.name, repopkg.builddate) + dbpkg.build_date = repopkg.builddate dbpkg.packager_str = repopkg.packager # attempt to find the corresponding django user for this string dbpkg.packager = find_user(repopkg.packager) @@ -394,7 +397,7 @@ def db_update(archname, reponame, pkgs, options): # packages in syncdb and not in database (add to database) for p in [x for x in pkgs if x.name in in_sync_not_db]: logger.info("Adding package %s", p.name) - pkg = Package(pkgname = p.name, arch = architecture, repo = repository) + pkg = Package(pkgname=p.name, arch=architecture, repo=repository) score = populate_pkg(pkg, p, timestamp=datetime.utcnow()) batcher.batch_commit(score) @@ -480,7 +483,8 @@ def parse_repo(repopath): repodb = tarfile.open(repopath, "r") logger.debug("Starting package parsing") dbfiles = ('desc', 'depends', 'files') - pkgs = {} + newpkg = lambda: Pkg(reponame) + pkgs = defaultdict(newpkg) for tarinfo in repodb.getmembers(): if tarinfo.isreg(): pkgid, fname = os.path.split(tarinfo.name) @@ -490,9 +494,7 @@ def parse_repo(repopath): data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), encoding='utf=8') try: - data = parse_info(data_file) - p = pkgs.setdefault(pkgid, Pkg(reponame)) - p.populate(data) + pkgs[pkgid].populate(parse_info(data_file)) except UnicodeDecodeError: logger.warn("Could not correctly decode %s, skipping file", tarinfo.name) -- cgit v1.2.3-2-g168b From b336dd15598132d1c501a9d44bc4d5a0e64bfb2e Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 10 Jun 2011 10:46:06 -0500 Subject: reporead: small memory/perf improvements Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 4d30388e..baf7fee1 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -116,11 +116,11 @@ class Pkg(object): logger.warning('Package %s had unparsable build date %s', self.name, v[0]) elif k == 'files': - self.files = v + self.files = tuple(v) self.has_files = True else: # anything left in collections - setattr(self, k, v) + setattr(self, k, tuple(v)) @property def full_version(self): @@ -528,7 +528,9 @@ def read_repo(primary_arch, repo_file, options): else: # we don't include mis-arched packages logger.warning("Package %s arch = %s", - package.name,package.arch) + package.name, package.arch) + del packages + logger.info('Starting database updates.') for arch in sorted(packages_arches.keys()): db_update(arch, repo, packages_arches[arch], options) -- cgit v1.2.3-2-g168b From f252aede11319c694f3a81626b996403eb14444f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 14 Jun 2011 19:18:14 -0500 Subject: Add named devel URLs Signed-off-by: Dan McGee --- devel/urls.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'devel') diff --git a/devel/urls.py b/devel/urls.py index 8759562e..bc3bcace 100644 --- a/devel/urls.py +++ b/devel/urls.py @@ -3,8 +3,8 @@ from django.conf.urls.defaults import patterns urlpatterns = patterns('devel.views', (r'^admin_log/$','admin_log'), (r'^admin_log/(?P.*)/$','admin_log'), - (r'^clock/$', 'clock'), - (r'^$', 'index'), + (r'^clock/$', 'clock', {}, 'devel-clocks'), + (r'^$', 'index', {}, 'devel-index'), (r'^newuser/$', 'new_user_form'), (r'^profile/$', 'change_profile'), (r'^reports/(?P.*)/(?P.*)/$', 'report'), -- cgit v1.2.3-2-g168b From 4a9b6867a3a2786435316ab7deefa54257bb931d Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 15 Jun 2011 15:50:14 -0500 Subject: Refactor common select_related into manager method For a Package object query, we almost always did .select_related('arch', 'repo). Refactor this into the manager as a 'normal()' method so we can avoid sprinkling the same logic everywhere. Signed-off-by: Dan McGee --- devel/views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'devel') diff --git a/devel/views.py b/devel/views.py index 2b8bd43e..1827f2ac 100644 --- a/devel/views.py +++ b/devel/views.py @@ -32,7 +32,7 @@ from string import ascii_letters, digits def index(request): '''the developer dashboard''' inner_q = PackageRelation.objects.filter(user=request.user).values('pkgbase') - flagged = Package.objects.select_related('arch', 'repo').filter( + flagged = Package.objects.normal().filter( flag_date__isnull=False, pkgbase__in=inner_q).order_by('pkgname') todopkgs = TodolistPkg.objects.select_related( @@ -135,7 +135,7 @@ def change_profile(request): @login_required def report(request, report, username=None): title = 'Developer Report' - packages = Package.objects.select_related('arch', 'repo') + packages = Package.objects.normal() names = attrs = user = None if report == 'old': -- cgit v1.2.3-2-g168b From aaae7e599eeedbc7e1f23d2da1588ad4edcd0f5f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 15 Jun 2011 15:53:41 -0500 Subject: Fix out of date test A version of this view is now publicly available, so it returns 200. Signed-off-by: Dan McGee --- devel/tests.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'devel') diff --git a/devel/tests.py b/devel/tests.py index da5459d6..604da74c 100644 --- a/devel/tests.py +++ b/devel/tests.py @@ -26,7 +26,4 @@ class DevelTest(TestCase): def test_mirrors(self): response = self.client.get('/mirrors/') - self.assertEqual(response.status_code, 302) - self.assertEqual(response.has_header('Location'), True) - self.assertEqual(response['location'], - 'http://testserver/login/?next=/mirrors/') + self.assertEqual(response.status_code, 200) -- cgit v1.2.3-2-g168b