From 1840416b9e8892a685202f30b4079fd04607151f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 2 Jun 2011 16:21:08 -0500 Subject: Add a PGP key field on the dev profile Signed-off-by: Dan McGee --- devel/views.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'devel') diff --git a/devel/views.py b/devel/views.py index 475376ad..25ad2ccf 100644 --- a/devel/views.py +++ b/devel/views.py @@ -102,6 +102,13 @@ class ProfileForm(forms.Form): return self.cleaned_data class UserProfileForm(forms.ModelForm): + def clean_pgp_key(self): + data = self.cleaned_data['pgp_key'] + # strip 0x prefix if provided; store uppercase + if data.startswith('0x'): + data = data[2:] + return data.upper() + class Meta: model = UserProfile exclude = ['allowed_repos', 'user'] -- cgit v1.2.3-2-g168b From 5fe626c6cc8444603cf7ae5199271b69d38ff255 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 9 Jun 2011 16:17:42 -0500 Subject: Management command cleanup Now that we aren't seeing odd segfaults and hung tasks, we can remove the traceback stuff from the scripts. Also use the 'io' module only, it has been long enough. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index a8875c7e..1adc359e 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -18,7 +18,7 @@ from django.contrib.auth.models import User from django.db import transaction from django.db.models import Q -import codecs +import io import os import re import sys @@ -27,14 +27,6 @@ import logging from datetime import datetime from optparse import make_option -# New in 2.6, but fast (C implementation) in 2.7. We will use it over codecs if -# available. Eventually remove the codecs import completely. -io = None -try: - import io -except ImportError: - pass - from main.models import Arch, Package, PackageDepend, PackageFile, Repo from packages.models import Conflict, Provision, Replacement @@ -74,11 +66,6 @@ class Command(BaseCommand): elif v == 2: logger.level = logging.DEBUG - import signal, traceback - handler = lambda sig, stack: traceback.print_stack(stack) - signal.signal(signal.SIGQUIT, handler) - signal.signal(signal.SIGUSR1, handler) - return read_repo(arch, filename, options) @@ -101,8 +88,7 @@ class Pkg(object): setattr(self, k, None) for k in self.collections: setattr(self, k, ()) - # So we can tell the diffence between a package with no files, and a DB - # without files entries + self.files = None self.has_files = False def populate(self, values): @@ -461,16 +447,13 @@ def parse_repo(repopath): if fname not in dbfiles: continue data_file = repodb.extractfile(tarinfo) - if io is None: - data_file = codecs.EncodedFile(data_file, 'utf-8') - else: - data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), - encoding='utf=8') + data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), + encoding='utf=8') try: data = parse_info(data_file) p = pkgs.setdefault(pkgid, Pkg(reponame)) p.populate(data) - except UnicodeDecodeError, e: + except UnicodeDecodeError: logger.warn("Could not correctly decode %s, skipping file", tarinfo.name) data_file.close() -- cgit v1.2.3-2-g168b From 895f8a20d35a18f3a0cc6e1530eb40292270fc7c Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 9 Jun 2011 17:25:32 -0500 Subject: reporead: allow batching of package updates The real reason I originally added transactions to this code was to prevent half-updates; e.g. a package gets in without the matching depends values. We can safely commit between packages and resume processing the database at a later time. Take advantage of this fact and commit every so often in batch fashion if we have a lot of updates piling up. In the case of updating the files DB, this can really cut down on the need to hold open a long-running, statement heavy transaction and get the information public faster. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 58 +++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 9 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 1adc359e..e9878c93 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -221,6 +221,8 @@ def create_multivalued(dbpkg, repopkg, db_attr, repo_attr): collection.create(name=name) def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): + db_score = 1 + if repopkg.base: dbpkg.pkgbase = repopkg.base else: @@ -251,7 +253,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.last_update = timestamp dbpkg.save() - populate_files(dbpkg, repopkg, force=force) + db_score += populate_files(dbpkg, repopkg, force=force) dbpkg.packagedepend_set.all().delete() for y in repopkg.depends: @@ -272,6 +274,15 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): create_multivalued(dbpkg, repopkg, 'groups', 'groups') create_multivalued(dbpkg, repopkg, 'licenses', 'license') + related_score = (len(repopkg.depends) + len(repopkg.optdepends) + + len(repopkg.conflicts) + len(repopkg.provides) + + len(repopkg.replaces) + len(repopkg.groups) + + len(repopkg.license)) + if related_score: + db_score += (related_score / 20) + 1 + + return db_score + def populate_files(dbpkg, repopkg, force=False): if not force: @@ -280,11 +291,11 @@ def populate_files(dbpkg, repopkg, force=False): logger.info("DB version (%s) didn't match repo version " "(%s) for package %s, skipping file list addition", dbpkg.full_version, repopkg.full_version, dbpkg.pkgname) - return + return 0 if not dbpkg.files_last_update or not dbpkg.last_update: pass elif dbpkg.files_last_update > dbpkg.last_update: - return + return 0 # only delete files if we are reading a DB that contains them if repopkg.has_files: dbpkg.packagefile_set.all().delete() @@ -303,6 +314,28 @@ def populate_files(dbpkg, repopkg, force=False): pkgfile.save(force_insert=True) dbpkg.files_last_update = datetime.utcnow() dbpkg.save() + return (len(repopkg.files) / 50) + 1 + return 0 + + +class Batcher(object): + def __init__(self, threshold, start=0): + self.threshold = threshold + self.meter = start + + def batch_commit(self, score): + """ + Track updates to the database and perform a commit if the batch + becomes sufficiently large. "Large" is defined by waiting for the + sum of scores to exceed the arbitrary threshold value; once it is + hit a commit is issued. + """ + self.meter += score + if self.meter > self.threshold: + logger.debug("Committing transaction, batch threshold hit") + transaction.commit() + self.meter = 0 + @transaction.commit_on_success def db_update(archname, reponame, pkgs, options): @@ -355,19 +388,23 @@ def db_update(archname, reponame, pkgs, options): elif dbpercent < 75.0: logger.warning(msg) + batcher = Batcher(100) + if not filesonly: # packages in syncdb and not in database (add to database) for p in [x for x in pkgs if x.name in in_sync_not_db]: logger.info("Adding package %s", p.name) pkg = Package(pkgname = p.name, arch = architecture, repo = repository) - populate_pkg(pkg, p, timestamp=datetime.utcnow()) + score = populate_pkg(pkg, p, timestamp=datetime.utcnow()) + batcher.batch_commit(score) # packages in database and not in syncdb (remove from database) in_db_not_sync = dbset - syncset for p in in_db_not_sync: - logger.info("Removing package %s from database", p) + logger.info("Removing package %s", p) dbp = dbdict[p] dbp.delete() + batcher.batch_commit(score) # packages in both database and in syncdb (update in database) pkg_in_both = syncset & dbset @@ -385,12 +422,15 @@ def db_update(archname, reponame, pkgs, options): continue else: timestamp = datetime.utcnow() + if filesonly: - logger.debug("Checking files for package %s in database", p.name) - populate_files(dbp, p, force=force) + logger.debug("Checking files for package %s", p.name) + score = populate_files(dbp, p, force=force) else: - logger.info("Updating package %s in database", p.name) - populate_pkg(dbp, p, force=force, timestamp=timestamp) + logger.info("Updating package %s", p.name) + score = populate_pkg(dbp, p, force=force, timestamp=timestamp) + + batcher.batch_commit(score) logger.info('Finished updating Arch: %s', archname) -- cgit v1.2.3-2-g168b From 01b07b5b07cd152949c9f01fec91408945273583 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 9 Jun 2011 17:30:50 -0500 Subject: Fix busted batch score on package removal Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index e9878c93..0bd5587a 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -404,7 +404,7 @@ def db_update(archname, reponame, pkgs, options): logger.info("Removing package %s", p) dbp = dbdict[p] dbp.delete() - batcher.batch_commit(score) + batcher.batch_commit(1) # packages in both database and in syncdb (update in database) pkg_in_both = syncset & dbset -- cgit v1.2.3-2-g168b From 3284b302550dbd255762b8606343288d49402056 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Mon, 13 Jun 2011 13:18:28 -0500 Subject: Add a long out-of-date developer report This shows packages that have been marked out of date for more than 90 days in the repos. Signed-off-by: Dan McGee --- devel/views.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'devel') diff --git a/devel/views.py b/devel/views.py index 25ad2ccf..2b8bd43e 100644 --- a/devel/views.py +++ b/devel/views.py @@ -140,9 +140,14 @@ def report(request, report, username=None): if report == 'old': title = 'Packages last built more than two years ago' - cutoff = datetime.now() - timedelta(days=730) + cutoff = datetime.now() - timedelta(days=365 * 2) packages = packages.filter( build_date__lt=cutoff).order_by('build_date') + elif report == 'long-out-of-date': + title = 'Packages marked out-of-date more than 90 days ago' + cutoff = datetime.now() - timedelta(days=90) + packages = packages.filter( + flag_date__lt=cutoff).order_by('flag_date') elif report == 'big': title = 'Packages with compressed size > 50 MiB' cutoff = 50 * 1024 * 1024 -- cgit v1.2.3-2-g168b From 92dbad587ab77b84130b86153464647d583b677e Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 10 Jun 2011 10:43:57 -0500 Subject: reporead: two small cleanups * Parse builddate when reading from repo database file * Use defaultdict where it comes in handy Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 0bd5587a..4d30388e 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -18,6 +18,7 @@ from django.contrib.auth.models import User from django.db import transaction from django.db.models import Q +from collections import defaultdict import io import os import re @@ -72,7 +73,7 @@ class Command(BaseCommand): class Pkg(object): """An interim 'container' object for holding Arch package data.""" bare = ( 'name', 'base', 'arch', 'desc', 'filename', - 'md5sum', 'url', 'builddate', 'packager' ) + 'md5sum', 'url', 'packager' ) number = ( 'csize', 'isize' ) collections = ( 'depends', 'optdepends', 'conflicts', 'provides', 'replaces', 'groups', 'license', 'files' ) @@ -104,6 +105,16 @@ class Pkg(object): self.rel = match.group(4) if match.group(2): self.epoch = int(match.group(2)) + elif k == 'builddate': + try: + self.builddate = datetime.utcfromtimestamp(int(v[0])) + except ValueError: + try: + self.builddate = datetime.strptime(v[0], + '%a %b %d %H:%M:%S %Y') + except ValueError: + logger.warning('Package %s had unparsable build date %s', + self.name, v[0]) elif k == 'files': self.files = v self.has_files = True @@ -235,15 +246,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.filename = repopkg.filename dbpkg.compressed_size = repopkg.csize dbpkg.installed_size = repopkg.isize - try: - dbpkg.build_date = datetime.utcfromtimestamp(int(repopkg.builddate)) - except ValueError: - try: - dbpkg.build_date = datetime.strptime(repopkg.builddate, - '%a %b %d %H:%M:%S %Y') - except ValueError: - logger.warning('Package %s had unparsable build date %s', - repopkg.name, repopkg.builddate) + dbpkg.build_date = repopkg.builddate dbpkg.packager_str = repopkg.packager # attempt to find the corresponding django user for this string dbpkg.packager = find_user(repopkg.packager) @@ -394,7 +397,7 @@ def db_update(archname, reponame, pkgs, options): # packages in syncdb and not in database (add to database) for p in [x for x in pkgs if x.name in in_sync_not_db]: logger.info("Adding package %s", p.name) - pkg = Package(pkgname = p.name, arch = architecture, repo = repository) + pkg = Package(pkgname=p.name, arch=architecture, repo=repository) score = populate_pkg(pkg, p, timestamp=datetime.utcnow()) batcher.batch_commit(score) @@ -480,7 +483,8 @@ def parse_repo(repopath): repodb = tarfile.open(repopath, "r") logger.debug("Starting package parsing") dbfiles = ('desc', 'depends', 'files') - pkgs = {} + newpkg = lambda: Pkg(reponame) + pkgs = defaultdict(newpkg) for tarinfo in repodb.getmembers(): if tarinfo.isreg(): pkgid, fname = os.path.split(tarinfo.name) @@ -490,9 +494,7 @@ def parse_repo(repopath): data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), encoding='utf=8') try: - data = parse_info(data_file) - p = pkgs.setdefault(pkgid, Pkg(reponame)) - p.populate(data) + pkgs[pkgid].populate(parse_info(data_file)) except UnicodeDecodeError: logger.warn("Could not correctly decode %s, skipping file", tarinfo.name) -- cgit v1.2.3-2-g168b From b336dd15598132d1c501a9d44bc4d5a0e64bfb2e Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 10 Jun 2011 10:46:06 -0500 Subject: reporead: small memory/perf improvements Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 4d30388e..baf7fee1 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -116,11 +116,11 @@ class Pkg(object): logger.warning('Package %s had unparsable build date %s', self.name, v[0]) elif k == 'files': - self.files = v + self.files = tuple(v) self.has_files = True else: # anything left in collections - setattr(self, k, v) + setattr(self, k, tuple(v)) @property def full_version(self): @@ -528,7 +528,9 @@ def read_repo(primary_arch, repo_file, options): else: # we don't include mis-arched packages logger.warning("Package %s arch = %s", - package.name,package.arch) + package.name, package.arch) + del packages + logger.info('Starting database updates.') for arch in sorted(packages_arches.keys()): db_update(arch, repo, packages_arches[arch], options) -- cgit v1.2.3-2-g168b From f252aede11319c694f3a81626b996403eb14444f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 14 Jun 2011 19:18:14 -0500 Subject: Add named devel URLs Signed-off-by: Dan McGee --- devel/urls.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'devel') diff --git a/devel/urls.py b/devel/urls.py index 8759562e..bc3bcace 100644 --- a/devel/urls.py +++ b/devel/urls.py @@ -3,8 +3,8 @@ from django.conf.urls.defaults import patterns urlpatterns = patterns('devel.views', (r'^admin_log/$','admin_log'), (r'^admin_log/(?P.*)/$','admin_log'), - (r'^clock/$', 'clock'), - (r'^$', 'index'), + (r'^clock/$', 'clock', {}, 'devel-clocks'), + (r'^$', 'index', {}, 'devel-index'), (r'^newuser/$', 'new_user_form'), (r'^profile/$', 'change_profile'), (r'^reports/(?P.*)/(?P.*)/$', 'report'), -- cgit v1.2.3-2-g168b From 4a9b6867a3a2786435316ab7deefa54257bb931d Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 15 Jun 2011 15:50:14 -0500 Subject: Refactor common select_related into manager method For a Package object query, we almost always did .select_related('arch', 'repo). Refactor this into the manager as a 'normal()' method so we can avoid sprinkling the same logic everywhere. Signed-off-by: Dan McGee --- devel/views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'devel') diff --git a/devel/views.py b/devel/views.py index 2b8bd43e..1827f2ac 100644 --- a/devel/views.py +++ b/devel/views.py @@ -32,7 +32,7 @@ from string import ascii_letters, digits def index(request): '''the developer dashboard''' inner_q = PackageRelation.objects.filter(user=request.user).values('pkgbase') - flagged = Package.objects.select_related('arch', 'repo').filter( + flagged = Package.objects.normal().filter( flag_date__isnull=False, pkgbase__in=inner_q).order_by('pkgname') todopkgs = TodolistPkg.objects.select_related( @@ -135,7 +135,7 @@ def change_profile(request): @login_required def report(request, report, username=None): title = 'Developer Report' - packages = Package.objects.select_related('arch', 'repo') + packages = Package.objects.normal() names = attrs = user = None if report == 'old': -- cgit v1.2.3-2-g168b From aaae7e599eeedbc7e1f23d2da1588ad4edcd0f5f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 15 Jun 2011 15:53:41 -0500 Subject: Fix out of date test A version of this view is now publicly available, so it returns 200. Signed-off-by: Dan McGee --- devel/tests.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'devel') diff --git a/devel/tests.py b/devel/tests.py index da5459d6..604da74c 100644 --- a/devel/tests.py +++ b/devel/tests.py @@ -26,7 +26,4 @@ class DevelTest(TestCase): def test_mirrors(self): response = self.client.get('/mirrors/') - self.assertEqual(response.status_code, 302) - self.assertEqual(response.has_header('Location'), True) - self.assertEqual(response['location'], - 'http://testserver/login/?next=/mirrors/') + self.assertEqual(response.status_code, 200) -- cgit v1.2.3-2-g168b From da20949c8cc185e91dbaae1b8369fcffa3447081 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 23 Jun 2011 19:11:12 -0500 Subject: Move find_user method to devel utils This could be handy elsewhere as well, and it is loosely coupled to anything else in reporead. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 51 +----------------------------- devel/utils.py | 58 +++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 50 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index baf7fee1..138931ff 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -16,7 +16,6 @@ Example: from django.core.management.base import BaseCommand, CommandError from django.contrib.auth.models import User from django.db import transaction -from django.db.models import Q from collections import defaultdict import io @@ -28,6 +27,7 @@ import logging from datetime import datetime from optparse import make_option +from devel.utils import find_user from main.models import Arch, Package, PackageDepend, PackageFile, Repo from packages.models import Conflict, Provision, Replacement @@ -130,55 +130,6 @@ class Pkg(object): return u'%s-%s' % (self.ver, self.rel) -def find_user(userstring): - ''' - Attempt to find the corresponding User object for a standard - packager string, e.g. something like - 'A. U. Thor '. - We start by searching for a matching email address; we then move onto - matching by first/last name. If we cannot find a user, then return None. - ''' - if userstring in find_user.cache: - return find_user.cache[userstring] - matches = re.match(r'^([^<]+)? ?<([^>]*)>', userstring) - if not matches: - return None - - user = None - name = matches.group(1) - email = matches.group(2) - - def user_email(): - return User.objects.get(email=email) - def profile_email(): - return User.objects.get(userprofile__public_email=email) - def user_name(): - # yes, a bit odd but this is the easiest way since we can't always be - # sure how to split the name. Ensure every 'token' appears in at least - # one of the two name fields. - name_q = Q() - for token in name.split(): - # ignore quoted parts; e.g. nicknames in strings - if re.match(r'^[\'"].*[\'"]$', token): - continue - name_q &= (Q(first_name__icontains=token) | - Q(last_name__icontains=token)) - return User.objects.get(name_q) - - for matcher in (user_email, profile_email, user_name): - try: - user = matcher() - break - except (User.DoesNotExist, User.MultipleObjectsReturned): - pass - - find_user.cache[userstring] = user - return user - -# cached mappings of user strings -> User objects so we don't have to do the -# lookup more than strictly necessary. -find_user.cache = {} - DEPEND_RE = re.compile(r"^(.+?)((>=|<=|=|>|<)(.*))?$") def create_depend(package, dep_str, optional=False): diff --git a/devel/utils.py b/devel/utils.py index abfdabe5..9d7dfb2d 100644 --- a/devel/utils.py +++ b/devel/utils.py @@ -1,7 +1,11 @@ +import re + from django.contrib.auth.models import User from django.db import connection +from django.db.models import Count, Q from main.utils import cache_function +from main.models import Package from packages.models import PackageRelation @cache_function(300) @@ -28,10 +32,64 @@ SELECT pr.user_id, COUNT(*), COUNT(p.flag_date) pkg_count[k] = total flag_count[k] = flagged + update_count = Package.objects.values_list('packager').annotate( + Count('packager')) + update_count = dict(update_count) + for m in maintainers: m.package_count = pkg_count.get(m.id, 0) m.flagged_count = flag_count.get(m.id, 0) + m.updated_count = update_count.get(m.id, 0) return maintainers +def find_user(userstring): + ''' + Attempt to find the corresponding User object for a standard + packager string, e.g. something like + 'A. U. Thor '. + We start by searching for a matching email address; we then move onto + matching by first/last name. If we cannot find a user, then return None. + ''' + if userstring in find_user.cache: + return find_user.cache[userstring] + matches = re.match(r'^([^<]+)? ?<([^>]*)>', userstring) + if not matches: + return None + + user = None + name = matches.group(1) + email = matches.group(2) + + def user_email(): + return User.objects.get(email=email) + def profile_email(): + return User.objects.get(userprofile__public_email=email) + def user_name(): + # yes, a bit odd but this is the easiest way since we can't always be + # sure how to split the name. Ensure every 'token' appears in at least + # one of the two name fields. + name_q = Q() + for token in name.split(): + # ignore quoted parts; e.g. nicknames in strings + if re.match(r'^[\'"].*[\'"]$', token): + continue + name_q &= (Q(first_name__icontains=token) | + Q(last_name__icontains=token)) + return User.objects.get(name_q) + + for matcher in (user_email, profile_email, user_name): + try: + user = matcher() + break + except (User.DoesNotExist, User.MultipleObjectsReturned): + pass + + find_user.cache[userstring] = user + return user + +# cached mappings of user strings -> User objects so we don't have to do the +# lookup more than strictly necessary. +find_user.cache = {} + # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From 26c54d017185b1c409dbd6ed4c09fb14986df0b3 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 23 Jun 2011 19:35:47 -0500 Subject: find_user: add tests and fix no email address case If a packager string was passed in without an email address, we would blow up on the matcher and not try to find a user. Signed-off-by: Dan McGee --- devel/tests.py | 45 ++++++++++++++++++++++++++++++++++++++++++++- devel/utils.py | 26 ++++++++++++++++++-------- 2 files changed, 62 insertions(+), 9 deletions(-) (limited to 'devel') diff --git a/devel/tests.py b/devel/tests.py index 604da74c..33b02582 100644 --- a/devel/tests.py +++ b/devel/tests.py @@ -1,8 +1,10 @@ from django.test import TestCase +from django.contrib.auth.models import User +from devel.utils import find_user +from main.models import UserProfile class DevelTest(TestCase): - def test_index(self): response = self.client.get('/devel/') self.assertEqual(response.status_code, 302) @@ -27,3 +29,44 @@ class DevelTest(TestCase): def test_mirrors(self): response = self.client.get('/mirrors/') self.assertEqual(response.status_code, 200) + +class FindUserTest(TestCase): + + def setUp(self): + self.user1 = User.objects.create(username="joeuser", first_name="Joe", + last_name="User", email="user1@example.com") + self.user2 = User.objects.create(username="john", first_name="John", + last_name="", email="user2@example.com") + self.user3 = User.objects.create(username="bjones", first_name="Bob", + last_name="Jones", email="user3@example.com") + + for user in (self.user1, self.user2, self.user3): + email_addr = "%s@awesome.com" % user.username + UserProfile.objects.create(user=user, public_email=email_addr) + + def test_not_matching(self): + self.assertIsNone(find_user(None)) + self.assertIsNone(find_user("")) + self.assertIsNone(find_user("Bogus")) + self.assertIsNone(find_user("Bogus ")) + self.assertIsNone(find_user("")) + self.assertIsNone(find_user("bogus@example.com")) + self.assertIsNone(find_user("Unknown Packager")) + + def test_by_email(self): + self.assertEqual(self.user1, find_user("XXX YYY ")) + self.assertEqual(self.user2, find_user("YYY ZZZ ")) + + def test_by_profile_email(self): + self.assertEqual(self.user1, find_user("XXX ")) + self.assertEqual(self.user2, find_user("YYY ")) + self.assertEqual(self.user3, find_user("ZZZ ")) + + def test_by_name(self): + self.assertEqual(self.user1, find_user("Joe User ")) + self.assertEqual(self.user1, find_user("Joe User")) + self.assertEqual(self.user2, find_user("John ")) + self.assertEqual(self.user3, find_user("Bob Jones ")) + +# vim: set ts=4 sw=4 et: diff --git a/devel/utils.py b/devel/utils.py index 9d7dfb2d..acdda959 100644 --- a/devel/utils.py +++ b/devel/utils.py @@ -51,24 +51,32 @@ def find_user(userstring): We start by searching for a matching email address; we then move onto matching by first/last name. If we cannot find a user, then return None. ''' + if not userstring: + return None if userstring in find_user.cache: return find_user.cache[userstring] matches = re.match(r'^([^<]+)? ?<([^>]*)>', userstring) if not matches: - return None - - user = None - name = matches.group(1) - email = matches.group(2) + name = userstring + email = None + else: + name = matches.group(1) + email = matches.group(2) def user_email(): - return User.objects.get(email=email) + if email: + return User.objects.get(email=email) + return None def profile_email(): - return User.objects.get(userprofile__public_email=email) + if email: + return User.objects.get(userprofile__public_email=email) + return None def user_name(): # yes, a bit odd but this is the easiest way since we can't always be # sure how to split the name. Ensure every 'token' appears in at least # one of the two name fields. + if not name: + return None name_q = Q() for token in name.split(): # ignore quoted parts; e.g. nicknames in strings @@ -78,10 +86,12 @@ def find_user(userstring): Q(last_name__icontains=token)) return User.objects.get(name_q) + user = None for matcher in (user_email, profile_email, user_name): try: user = matcher() - break + if user != None: + break except (User.DoesNotExist, User.MultipleObjectsReturned): pass -- cgit v1.2.3-2-g168b From 9156003d2d93de57c663901c39ac66316a3d969e Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 23 Jun 2011 19:50:46 -0500 Subject: Turn find_user into UserFinder class This moves the cache inside an instance. Also add a few more tests. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 6 ++- devel/tests.py | 69 ++++++++++++++++++++-------- devel/utils.py | 86 +++++++++++++++++++---------------- 3 files changed, 103 insertions(+), 58 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 138931ff..470b785d 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -27,7 +27,7 @@ import logging from datetime import datetime from optparse import make_option -from devel.utils import find_user +from devel.utils import UserFinder from main.models import Arch, Package, PackageDepend, PackageFile, Repo from packages.models import Conflict, Provision, Replacement @@ -182,6 +182,8 @@ def create_multivalued(dbpkg, repopkg, db_attr, repo_attr): for name in getattr(repopkg, repo_attr): collection.create(name=name) +finder = UserFinder() + def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): db_score = 1 @@ -200,7 +202,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.build_date = repopkg.builddate dbpkg.packager_str = repopkg.packager # attempt to find the corresponding django user for this string - dbpkg.packager = find_user(repopkg.packager) + dbpkg.packager = finder.find(repopkg.packager) if timestamp: dbpkg.flag_date = None diff --git a/devel/tests.py b/devel/tests.py index 33b02582..c982e502 100644 --- a/devel/tests.py +++ b/devel/tests.py @@ -1,7 +1,7 @@ from django.test import TestCase from django.contrib.auth.models import User -from devel.utils import find_user +from devel.utils import UserFinder from main.models import UserProfile class DevelTest(TestCase): @@ -33,6 +33,8 @@ class DevelTest(TestCase): class FindUserTest(TestCase): def setUp(self): + self.finder = UserFinder() + self.user1 = User.objects.create(username="joeuser", first_name="Joe", last_name="User", email="user1@example.com") self.user2 = User.objects.create(username="john", first_name="John", @@ -44,29 +46,60 @@ class FindUserTest(TestCase): email_addr = "%s@awesome.com" % user.username UserProfile.objects.create(user=user, public_email=email_addr) + self.user4 = User.objects.create(username="tim1", first_name="Tim", + last_name="One", email="tim@example.com") + self.user5 = User.objects.create(username="tim2", first_name="Tim", + last_name="Two", email="timtwo@example.com") + def test_not_matching(self): - self.assertIsNone(find_user(None)) - self.assertIsNone(find_user("")) - self.assertIsNone(find_user("Bogus")) - self.assertIsNone(find_user("Bogus ")) - self.assertIsNone(find_user("")) - self.assertIsNone(find_user("bogus@example.com")) - self.assertIsNone(find_user("Unknown Packager")) + self.assertIsNone(self.finder.find(None)) + self.assertIsNone(self.finder.find("")) + self.assertIsNone(self.finder.find("Bogus")) + self.assertIsNone(self.finder.find("Bogus ")) + self.assertIsNone(self.finder.find("")) + self.assertIsNone(self.finder.find("bogus@example.com")) + self.assertIsNone(self.finder.find("Unknown Packager")) def test_by_email(self): - self.assertEqual(self.user1, find_user("XXX YYY ")) - self.assertEqual(self.user2, find_user("YYY ZZZ ")) + self.assertEqual(self.user1, + self.finder.find("XXX YYY ")) + self.assertEqual(self.user2, + self.finder.find("YYY ZZZ ")) def test_by_profile_email(self): - self.assertEqual(self.user1, find_user("XXX ")) - self.assertEqual(self.user2, find_user("YYY ")) - self.assertEqual(self.user3, find_user("ZZZ ")) + self.assertEqual(self.user1, + self.finder.find("XXX ")) + self.assertEqual(self.user2, + self.finder.find("YYY ")) + self.assertEqual(self.user3, + self.finder.find("ZZZ ")) def test_by_name(self): - self.assertEqual(self.user1, find_user("Joe User ")) - self.assertEqual(self.user1, find_user("Joe User")) - self.assertEqual(self.user2, find_user("John ")) - self.assertEqual(self.user3, find_user("Bob Jones ")) + self.assertEqual(self.user1, + self.finder.find("Joe User ")) + self.assertEqual(self.user1, + self.finder.find("Joe User")) + self.assertEqual(self.user2, + self.finder.find("John ")) + self.assertEqual(self.user2, + self.finder.find("John")) + self.assertEqual(self.user3, + self.finder.find("Bob Jones ")) + + def test_cache(self): + # simply look two of them up, but then do it repeatedly + for i in range(50): + self.assertEqual(self.user1, + self.finder.find("XXX YYY ")) + self.assertEqual(self.user3, + self.finder.find("Bob Jones ")) + + def test_ambiguous(self): + self.assertEqual(self.user4, + self.finder.find("Tim One ")) + self.assertEqual(self.user5, + self.finder.find("Tim Two ")) + self.assertIsNone(self.finder.find("Tim ")) # vim: set ts=4 sw=4 et: diff --git a/devel/utils.py b/devel/utils.py index acdda959..3a6ad699 100644 --- a/devel/utils.py +++ b/devel/utils.py @@ -43,35 +43,25 @@ SELECT pr.user_id, COUNT(*), COUNT(p.flag_date) return maintainers -def find_user(userstring): - ''' - Attempt to find the corresponding User object for a standard - packager string, e.g. something like - 'A. U. Thor '. - We start by searching for a matching email address; we then move onto - matching by first/last name. If we cannot find a user, then return None. - ''' - if not userstring: - return None - if userstring in find_user.cache: - return find_user.cache[userstring] - matches = re.match(r'^([^<]+)? ?<([^>]*)>', userstring) - if not matches: - name = userstring - email = None - else: - name = matches.group(1) - email = matches.group(2) - - def user_email(): + +class UserFinder(object): + def __init__(self): + self.cache = {} + + @staticmethod + def user_email(name, email): if email: return User.objects.get(email=email) return None - def profile_email(): + + @staticmethod + def profile_email(name, email): if email: return User.objects.get(userprofile__public_email=email) return None - def user_name(): + + @staticmethod + def user_name(name, email): # yes, a bit odd but this is the easiest way since we can't always be # sure how to split the name. Ensure every 'token' appears in at least # one of the two name fields. @@ -86,20 +76,40 @@ def find_user(userstring): Q(last_name__icontains=token)) return User.objects.get(name_q) - user = None - for matcher in (user_email, profile_email, user_name): - try: - user = matcher() - if user != None: - break - except (User.DoesNotExist, User.MultipleObjectsReturned): - pass - - find_user.cache[userstring] = user - return user - -# cached mappings of user strings -> User objects so we don't have to do the -# lookup more than strictly necessary. -find_user.cache = {} + def find(self, userstring): + ''' + Attempt to find the corresponding User object for a standard + packager string, e.g. something like + 'A. U. Thor '. + We start by searching for a matching email address; we then move onto + matching by first/last name. If we cannot find a user, then return None. + ''' + if not userstring: + return None + if userstring in self.cache: + return self.cache[userstring] + matches = re.match(r'^([^<]+)? ?<([^>]*)>', userstring) + if not matches: + name = userstring + email = None + else: + name = matches.group(1) + email = matches.group(2) + + user = None + find_methods = (self.user_email, self.profile_email, self.user_name) + for matcher in find_methods: + try: + user = matcher(name, email) + if user != None: + break + except (User.DoesNotExist, User.MultipleObjectsReturned): + pass + + self.cache[userstring] = user + return user + + def clear_cache(self): + self.cache = {} # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From 82289ebb4432b3372b959430581afa0a2158acb9 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 23 Jun 2011 20:11:07 -0500 Subject: Add a rematch_packager management command This allows quick resolution of all unmatched packages, especially after tweaking the way find_user works. Signed-off-by: Dan McGee --- devel/management/commands/rematch_packager.py | 64 +++++++++++++++++++++++++++ devel/tests.py | 12 ++++- devel/utils.py | 8 ++-- 3 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 devel/management/commands/rematch_packager.py (limited to 'devel') diff --git a/devel/management/commands/rematch_packager.py b/devel/management/commands/rematch_packager.py new file mode 100644 index 00000000..ba6e6a54 --- /dev/null +++ b/devel/management/commands/rematch_packager.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +""" +rematch_packager command + +Match all packages with a packager_str but NULL packager_id to a packager if we +can find one. + +Usage: ./manage.py rematch_packager +""" + +from django.core.management.base import NoArgsCommand + +import sys +import logging + +from devel.utils import UserFinder +from main.models import Package + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s -> %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + stream=sys.stderr) +logger = logging.getLogger() + +class Command(NoArgsCommand): + help = "Runs a check on all active mirror URLs to determine if they are reachable via IPv4 and/or v6." + + def handle_noargs(self, **options): + v = int(options.get('verbosity', None)) + if v == 0: + logger.level = logging.ERROR + elif v == 1: + logger.level = logging.INFO + elif v == 2: + logger.level = logging.DEBUG + + return match_packager() + +def match_packager(): + finder = UserFinder() + logger.info("getting all unmatched packages") + package_count = matched_count = 0 + unknown = set() + + for package in Package.objects.filter(packager__isnull=True): + logger.debug("package %s, packager string %s", + package.pkgname, package.packager_str) + package_count += 1 + user = finder.find(package.packager_str) + if user: + package.packager = user + logger.debug(" found user %s" % user.username) + package.save() + matched_count += 1 + else: + unknown.add(package.packager_str) + + logger.info("%d packages checked, %d newly matched", + package_count, matched_count) + logger.debug("unknown packagers:\n%s", + "\n".join(unknown)) + +# vim: set ts=4 sw=4 et: diff --git a/devel/tests.py b/devel/tests.py index c982e502..36691179 100644 --- a/devel/tests.py +++ b/devel/tests.py @@ -1,7 +1,7 @@ from django.test import TestCase from django.contrib.auth.models import User -from devel.utils import UserFinder +from devel.utils import UserFinder from main.models import UserProfile class DevelTest(TestCase): @@ -87,6 +87,16 @@ class FindUserTest(TestCase): self.assertEqual(self.user3, self.finder.find("Bob Jones ")) + def test_by_invalid(self): + self.assertEqual(self.user1, + self.finder.find("Joe User ]*)>', userstring) + + name = email = None + + matches = re.match(r'^([^<]+)? ?<([^>]*)>?', userstring) if not matches: - name = userstring - email = None + name = userstring.strip() else: name = matches.group(1) email = matches.group(2) -- cgit v1.2.3-2-g168b From dcbb859a259082bf8d0587a63385ece44c697e45 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 23 Jun 2011 20:13:01 -0500 Subject: Add (hidden) ability to search by last packager This is used from the developer dashboard to add a new column to the stats of # of packages for a given developer where they were the last to do the packaging. Signed-off-by: Dan McGee --- devel/views.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'devel') diff --git a/devel/views.py b/devel/views.py index 1827f2ac..4399b73f 100644 --- a/devel/views.py +++ b/devel/views.py @@ -50,9 +50,11 @@ def index(request): total_orphans = Package.objects.exclude(pkgbase__in=maintained).count() total_flagged_orphans = Package.objects.filter( flag_date__isnull=False).exclude(pkgbase__in=maintained).count() + total_updated = Package.objects.filter(packager__isnull=True).count() orphan = { 'package_count': total_orphans, 'flagged_count': total_flagged_orphans, + 'updated_count': total_updated, } page_dict = { -- cgit v1.2.3-2-g168b From f913bbcab4dc458d1566778a094bdd337cb91841 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 28 Jun 2011 00:11:49 -0500 Subject: Add order_by to packager count query No real idea why SQLite is returning wrong results without out this, but it is likely a bug in the ORM layer I'm not interested in digging into. Signed-off-by: Dan McGee --- devel/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'devel') diff --git a/devel/utils.py b/devel/utils.py index 6bc52c89..d7a154a8 100644 --- a/devel/utils.py +++ b/devel/utils.py @@ -32,8 +32,8 @@ SELECT pr.user_id, COUNT(*), COUNT(p.flag_date) pkg_count[k] = total flag_count[k] = flagged - update_count = Package.objects.values_list('packager').annotate( - Count('packager')) + update_count = Package.objects.values_list('packager').order_by( + 'packager').annotate(Count('packager')) update_count = dict(update_count) for m in maintainers: -- cgit v1.2.3-2-g168b From d3f1763efefef9ff5095a49b075b27b38df83d16 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 28 Jun 2011 00:12:45 -0500 Subject: Add a bad compression ratio report Signed-off-by: Dan McGee --- devel/views.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'devel') diff --git a/devel/views.py b/devel/views.py index 4399b73f..79eef318 100644 --- a/devel/views.py +++ b/devel/views.py @@ -6,7 +6,7 @@ from django.contrib.auth.models import User, Group from django.contrib.sites.models import Site from django.core.mail import send_mail from django.db import transaction -from django.db.models import Q +from django.db.models import F, Q from django.http import Http404 from django.shortcuts import get_object_or_404 from django.template import loader, Context @@ -163,6 +163,23 @@ def report(request, report, username=None): package.compressed_size) package.installed_size_pretty = filesizeformat( package.installed_size) + elif report == 'badcompression': + title = 'Packages that have little need for compression' + cutoff = 0.90 * F('installed_size') + packages = packages.filter(compressed_size__gt=0, installed_size__gt=0, + compressed_size__gte=cutoff).order_by('-compressed_size') + names = [ 'Compressed Size', 'Installed Size', 'Ratio', 'Type' ] + attrs = [ 'compressed_size_pretty', 'installed_size_pretty', + 'ratio', 'compress_type' ] + # Format the compressed and installed sizes with MB/GB/etc suffixes + for package in packages: + package.compressed_size_pretty = filesizeformat( + package.compressed_size) + package.installed_size_pretty = filesizeformat( + package.installed_size) + ratio = package.compressed_size / float(package.installed_size) + package.ratio = '%.2f' % ratio + package.compress_type = package.filename.split('.')[-1] elif report == 'uncompressed-man': title = 'Packages with uncompressed manpages' # magic going on here! Checking for all '.1'...'.9' extensions -- cgit v1.2.3-2-g168b From b6f86d9ab0d6910f0f70398b07e965d337bd9e78 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 16 Aug 2011 16:04:16 -0500 Subject: Add two new DB fields to reporead Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 470b785d..97fdbb73 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -73,7 +73,7 @@ class Command(BaseCommand): class Pkg(object): """An interim 'container' object for holding Arch package data.""" bare = ( 'name', 'base', 'arch', 'desc', 'filename', - 'md5sum', 'url', 'packager' ) + 'md5sum', 'sha256sum', 'pgpsig', 'url', 'packager' ) number = ( 'csize', 'isize' ) collections = ( 'depends', 'optdepends', 'conflicts', 'provides', 'replaces', 'groups', 'license', 'files' ) -- cgit v1.2.3-2-g168b From e5d09fb7e9003b7f96685af9c0a722b45746448e Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 17 Aug 2011 16:18:12 -0500 Subject: Add PGP signature package field And add eventual display code for it to the details template, but don't show it yet as no packages will have it. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 1 + 1 file changed, 1 insertion(+) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 97fdbb73..cf597577 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -203,6 +203,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.packager_str = repopkg.packager # attempt to find the corresponding django user for this string dbpkg.packager = finder.find(repopkg.packager) + dbpkg.pgp_signature = repopkg.pgpsig if timestamp: dbpkg.flag_date = None -- cgit v1.2.3-2-g168b From 81884005d8496e12f9636e872eaedff33af77e30 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 5 Oct 2011 10:33:54 -0500 Subject: Allow developer index to work with a non-authenticated user This is not the normal case given the decorator on the view, but during testing and development it is sometimes useful so others don't have to log in over a non-secure connection to check things out. Signed-off-by: Dan McGee --- devel/views.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'devel') diff --git a/devel/views.py b/devel/views.py index 79eef318..ebae3b32 100644 --- a/devel/views.py +++ b/devel/views.py @@ -31,7 +31,12 @@ from string import ascii_letters, digits @never_cache def index(request): '''the developer dashboard''' - inner_q = PackageRelation.objects.filter(user=request.user).values('pkgbase') + if(request.user.is_authenticated()): + inner_q = PackageRelation.objects.filter(user=request.user) + else: + inner_q = PackageRelation.objects.none() + inner_q = inner_q.values('pkgbase') + flagged = Package.objects.normal().filter( flag_date__isnull=False, pkgbase__in=inner_q).order_by('pkgname') -- cgit v1.2.3-2-g168b From 2d5777b11d229d115a31a6c82236570002c2dd57 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 21 Oct 2011 18:49:00 -0500 Subject: Add a generate_keyring command This grabs all the PGP keys from the developer profiles and adds them to the keyrings. Obviously we may want to do more in the future such as filter by groups, active status, etc. but this is just a first iteration. Signed-off-by: Dan McGee --- devel/management/commands/generate_keyring.py | 59 +++++++++++++++++++++++++++ devel/management/commands/rematch_packager.py | 2 +- 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 devel/management/commands/generate_keyring.py (limited to 'devel') diff --git a/devel/management/commands/generate_keyring.py b/devel/management/commands/generate_keyring.py new file mode 100644 index 00000000..b95d5a8e --- /dev/null +++ b/devel/management/commands/generate_keyring.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +""" +generate_keyring command + +Assemble a GPG keyring with all known developer keys. + +Usage: ./manage.py generate_keyring +""" + +from django.core.management.base import BaseCommand, CommandError +from django.db.models import Q + +import logging +import subprocess +import sys + +from main.models import UserProfile + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s -> %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + stream=sys.stderr) +logger = logging.getLogger() + +class Command(BaseCommand): + args = " " + help = "Assemble a GPG keyring with all known developer keys." + + def handle(self, *args, **options): + v = int(options.get('verbosity', None)) + if v == 0: + logger.level = logging.ERROR + elif v == 1: + logger.level = logging.INFO + elif v == 2: + logger.level = logging.DEBUG + + if len(args) != 2: + raise CommandError("keyserver and keyring_path must be provided") + + return generate_keyring(args[0], args[1]) + +def generate_keyring(keyserver, keyring): + logger.info("getting all known key IDs") + + exclude = Q(pgp_key__isnull=True) & Q(pgp_key__exact="") + key_ids = UserProfile.objects.exclude( + exclude).values_list("pgp_key", flat=True) + logger.info("%d keys fetched from user profiles", len(key_ids)) + + gpg_cmd = ["gpg", "--no-default-keyring", "--keyring", keyring, + "--keyserver", keyserver, "--recv-keys"] + logger.info("running command: %r", gpg_cmd) + gpg_cmd.extend(key_ids) + subprocess.check_call(gpg_cmd) + logger.info("keyring at %s successfully updated", keyring) + +# vim: set ts=4 sw=4 et: diff --git a/devel/management/commands/rematch_packager.py b/devel/management/commands/rematch_packager.py index ba6e6a54..461d83ab 100644 --- a/devel/management/commands/rematch_packager.py +++ b/devel/management/commands/rematch_packager.py @@ -24,7 +24,7 @@ logging.basicConfig( logger = logging.getLogger() class Command(NoArgsCommand): - help = "Runs a check on all active mirror URLs to determine if they are reachable via IPv4 and/or v6." + help = "Match all packages with a packager_str but NULL packager_id to a packager if we can find one." def handle_noargs(self, **options): v = int(options.get('verbosity', None)) -- cgit v1.2.3-2-g168b From 1b83844b30d3271b8fb50757d827c7b8fe8b5585 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 26 Oct 2011 03:25:15 -0500 Subject: Ensure PGP signature values are not trimmed This makes them totally unusable for any real purpose down the road. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index cf597577..a8e3219e 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -73,7 +73,7 @@ class Command(BaseCommand): class Pkg(object): """An interim 'container' object for holding Arch package data.""" bare = ( 'name', 'base', 'arch', 'desc', 'filename', - 'md5sum', 'sha256sum', 'pgpsig', 'url', 'packager' ) + 'md5sum', 'sha256sum', 'url', 'packager' ) number = ( 'csize', 'isize' ) collections = ( 'depends', 'optdepends', 'conflicts', 'provides', 'replaces', 'groups', 'license', 'files' ) @@ -85,6 +85,7 @@ class Pkg(object): self.ver = None self.rel = None self.epoch = 0 + self.pgpsig = None for k in self.bare + self.number: setattr(self, k, None) for k in self.collections: @@ -99,6 +100,9 @@ class Pkg(object): setattr(self, k, v[0][:254]) elif k in self.number: setattr(self, k, long(v[0])) + elif k == 'pgpsig': + # do NOT prune this value at all + setattr(self, k, v[0]) elif k == 'version': match = self.version_re.match(v[0]) self.ver = match.group(3) -- cgit v1.2.3-2-g168b From ade2c08899abf77f6d836432c9988ad3f9652a95 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 1 Nov 2011 16:47:37 -0500 Subject: Really ensure we don't catch any NULL or blank values Fuck you too, Django. Signed-off-by: Dan McGee --- devel/management/commands/generate_keyring.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/generate_keyring.py b/devel/management/commands/generate_keyring.py index b95d5a8e..35ab8874 100644 --- a/devel/management/commands/generate_keyring.py +++ b/devel/management/commands/generate_keyring.py @@ -8,7 +8,6 @@ Usage: ./manage.py generate_keyring """ from django.core.management.base import BaseCommand, CommandError -from django.db.models import Q import logging import subprocess @@ -44,9 +43,10 @@ class Command(BaseCommand): def generate_keyring(keyserver, keyring): logger.info("getting all known key IDs") - exclude = Q(pgp_key__isnull=True) & Q(pgp_key__exact="") - key_ids = UserProfile.objects.exclude( - exclude).values_list("pgp_key", flat=True) + # Screw you Django, for not letting one natively do value != + key_ids = UserProfile.objects.filter(user__is_active=True, + pgp_key__isnull=False).extra(where=["pgp_key != ''"]).values_list( + "pgp_key", flat=True) logger.info("%d keys fetched from user profiles", len(key_ids)) gpg_cmd = ["gpg", "--no-default-keyring", "--keyring", keyring, -- cgit v1.2.3-2-g168b From d8e34919811728149a12e30d438318a3c1036a83 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 3 Nov 2011 15:50:06 -0500 Subject: Use UTC now everywhere Signed-off-by: Dan McGee --- devel/views.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'devel') diff --git a/devel/views.py b/devel/views.py index ebae3b32..694ed6dc 100644 --- a/devel/views.py +++ b/devel/views.py @@ -80,9 +80,9 @@ def clock(request): devs = User.objects.filter(is_active=True).order_by( 'username').select_related('userprofile') - # now annotate each dev object with their current time now = datetime.now() utc_now = datetime.utcnow().replace(tzinfo=pytz.utc) + # now annotate each dev object with their current time for dev in devs: tz = pytz.timezone(dev.userprofile.time_zone) dev.current_time = utc_now.astimezone(tz) @@ -147,12 +147,12 @@ def report(request, report, username=None): if report == 'old': title = 'Packages last built more than two years ago' - cutoff = datetime.now() - timedelta(days=365 * 2) + cutoff = datetime.utcnow() - timedelta(days=365 * 2) packages = packages.filter( build_date__lt=cutoff).order_by('build_date') elif report == 'long-out-of-date': title = 'Packages marked out-of-date more than 90 days ago' - cutoff = datetime.now() - timedelta(days=90) + cutoff = datetime.utcnow() - timedelta(days=90) packages = packages.filter( flag_date__lt=cutoff).order_by('flag_date') elif report == 'big': -- cgit v1.2.3-2-g168b From 022692b3f33de8c45741d3cb27fa95f9f6facdea Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 11 Nov 2011 10:43:18 -0600 Subject: Show relevant signoffs on dashboard Signed-off-by: Dan McGee --- devel/views.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'devel') diff --git a/devel/views.py b/devel/views.py index 694ed6dc..0002df04 100644 --- a/devel/views.py +++ b/devel/views.py @@ -18,6 +18,7 @@ from main.models import Package, PackageDepend, PackageFile, TodolistPkg from main.models import Arch, Repo from main.models import UserProfile from packages.models import PackageRelation +from packages.utils import get_signoff_groups from todolists.utils import get_annotated_todolists from .utils import get_annotated_maintainers @@ -48,6 +49,9 @@ def index(request): todolists = get_annotated_todolists() todolists = [todolist for todolist in todolists if todolist.incomplete_count > 0] + signoffs = sorted(get_signoff_groups(user=request.user), + key=operator.attrgetter('pkgbase')) + maintainers = get_annotated_maintainers() maintained = PackageRelation.objects.filter( @@ -70,6 +74,7 @@ def index(request): 'orphan': orphan, 'flagged' : flagged, 'todopkgs' : todopkgs, + 'signoffs': signoffs } return direct_to_template(request, 'devel/index.html', page_dict) -- cgit v1.2.3-2-g168b From 0344f8ad564644c50203985255fab1d053aed463 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 15 Nov 2011 15:04:33 -0600 Subject: Add ability to cache users by username on the UserFinder This is very useful in the signoff message population script where we are very likely to encounter the same users over and over. Signed-off-by: Dan McGee --- devel/utils.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'devel') diff --git a/devel/utils.py b/devel/utils.py index d7a154a8..62b12cd5 100644 --- a/devel/utils.py +++ b/devel/utils.py @@ -47,6 +47,7 @@ SELECT pr.user_id, COUNT(*), COUNT(p.flag_date) class UserFinder(object): def __init__(self): self.cache = {} + self.username_cache = {} @staticmethod def user_email(name, email): @@ -111,7 +112,22 @@ class UserFinder(object): self.cache[userstring] = user return user + def find_by_username(self, username): + if not username: + return None + if username in self.username_cache: + return self.username_cache[username] + + try: + user = User.objects.get(username=username) + except User.DoesNotExist: + user = None + + self.username_cache[username] = user + return user + def clear_cache(self): self.cache = {} + self.username_cache = {} # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From 9550236a87fc65827e994bea108350a43d3f161f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 15 Nov 2011 21:26:57 -0600 Subject: Improve primary arch validation Ensure we can accept either a Arch object or an architecture name when passed to read_repo() by moving the validation there and being a bit more careful about typechecking and object lookup. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index a8e3219e..b4966834 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -51,8 +51,6 @@ class Command(BaseCommand): def handle(self, arch=None, filename=None, **options): if not arch: raise CommandError('Architecture is required.') - if not validate_arch(arch): - raise CommandError('Specified architecture %s is not currently known.' % arch) if not filename: raise CommandError('Package database file is required.') filename = os.path.normpath(filename) @@ -464,29 +462,38 @@ def parse_repo(repopath): logger.info("Finished repo parsing, %d total packages", len(pkgs)) return (reponame, pkgs.values()) -def validate_arch(archname): +def locate_arch(arch): "Check if arch is valid." - return Arch.objects.filter(name__iexact=archname).exists() + if isinstance(arch, Arch): + return arch + try: + return Arch.objects.get(name__iexact=arch) + except Arch.DoesNotExist: + raise CommandError( + 'Specified architecture %s is not currently known.' % arch) + def read_repo(primary_arch, repo_file, options): """ Parses repo.db.tar.gz file and returns exit status. """ + # always returns an Arch object, regardless of what is passed in + primary_arch = locate_arch(primary_arch) + repo, packages = parse_repo(repo_file) # group packages by arch -- to handle noarch stuff packages_arches = {} for arch in Arch.objects.filter(agnostic=True): packages_arches[arch.name] = [] - packages_arches[primary_arch] = [] + packages_arches[primary_arch.name] = [] for package in packages: if package.arch in packages_arches: packages_arches[package.arch].append(package) else: # we don't include mis-arched packages - logger.warning("Package %s arch = %s", - package.name, package.arch) + logger.warning("Package %s arch = %s", package.name, package.arch) del packages logger.info('Starting database updates.') -- cgit v1.2.3-2-g168b From 2a2df0074e39a797a0a4b5f7db7cfc9097301328 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 15 Nov 2011 21:28:42 -0600 Subject: Add new reporead_inotify management command This is the new on-the-fly updates hotness. Rather than continue to schedule reporead to run once an hour in cron or however else you ran it, this command can be run once and left running, and will automagically pick up on any database file changes and run an import. It operates on the files databases only; this will keep both the packages and files always in sync and remove the delay in updating, especially helpful for new testing packages. Signed-off-by: Dan McGee --- devel/management/commands/reporead_inotify.py | 188 ++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100755 devel/management/commands/reporead_inotify.py (limited to 'devel') diff --git a/devel/management/commands/reporead_inotify.py b/devel/management/commands/reporead_inotify.py new file mode 100755 index 00000000..135c0367 --- /dev/null +++ b/devel/management/commands/reporead_inotify.py @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- +""" +reporead_inotify command + +Watches repo.files.tar.gz files for updates and parses them after a short delay +in order to catch all updates in a single bulk update. + +Usage: ./manage.py reporead_inotify [path_template] + +Where 'path_template' is an optional path_template for finding the +repo.files.tar.gz files. The form is '/srv/ftp/%(repo)s/os/%(arch)s/', which is +also the default template if none is specified. While 'repo' is not required to +be present in the path_template, note that 'arch' is so reporead can function +correctly. +""" + +import logging +import os.path +import pyinotify +import sys +import threading +import time + +from django.core.management.base import BaseCommand, CommandError + +from main.models import Arch, Repo +from .reporead import read_repo + +logging.basicConfig( + level=logging.WARNING, + format='%(asctime)s -> %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + stream=sys.stderr) +logger = logging.getLogger() + +class Command(BaseCommand): + help = "Watch database files and run an update when necessary." + args = "[path_template]" + + def handle(self, path_template=None, **options): + v = int(options.get('verbosity', 0)) + if v == 0: + logger.level = logging.ERROR + elif v == 1: + logger.level = logging.INFO + elif v == 2: + logger.level = logging.DEBUG + + if not path_template: + path_template = '/srv/ftp/%(repo)s/os/%(arch)s/' + self.path_template = path_template + + notifier = self.setup_notifier() + logger.info('Entering notifier loop') + notifier.loop() + + def setup_notifier(self): + '''Set up and configure the inotify machinery and logic. + This takes the provided or default path_template and builds a list of + directories we need to watch for database updates. It then validates + and passes these on to the various pyinotify pieces as necessary and + finally builds and returns a notifier object.''' + arches = Arch.objects.filter(agnostic=False) + repos = Repo.objects.all() + arch_path_map = dict((arch, None) for arch in arches) + all_paths = set() + total_paths = 0 + for arch in arches: + combos = ({ 'repo': repo.name.lower(), 'arch': arch.name } + for repo in repos) + # take a python format string and generate all unique combinations + # of directories from it; using set() ensures we filter it down + paths = set(self.path_template % values for values in combos) + total_paths += len(paths) + all_paths |= paths + arch_path_map[arch] = paths + + logger.info('Watching %d total paths', total_paths) + logger.debug(all_paths) + + # sanity check- basically ensure every path we created from the + # template mapped to only one architecture + if total_paths != len(all_paths): + raise CommandError('path template did not uniquely ' + 'determine architecture for each file') + + # A proper atomic replacement of the database as done by rsync is type + # IN_MOVED_TO. repo-add/remove will finish with a IN_CLOSE_WRITE. + mask = pyinotify.IN_CLOSE_WRITE | pyinotify.IN_MOVED_TO + + manager = pyinotify.WatchManager() + for name in all_paths: + manager.add_watch(name, mask) + + handler = EventHandler(arch_paths=arch_path_map) + return pyinotify.Notifier(manager, handler) + + +class Database(object): + '''A object representing a pacman database on the filesystem. It stores + various bits of metadata and state representing the file path, when we last + updated, how long our delay is before performing the update, whether we are + updating now, etc.''' + def __init__(self, arch, path, delay=60.0): + self.arch = arch + self.path = path + self.delay = delay + self.mtime = None + self.last_import = None + self.update_thread = None + self.updating = False + self.run_again = False + self.lock = threading.Lock() + + def _start_update_countdown(self): + self.update_thread = threading.Timer(self.delay, self.update) + logger.info('Starting %.1f second countdown to update %s', + self.delay, self.path) + self.update_thread.start() + + def queue_for_update(self, mtime): + logger.debug('Queueing database %s...', self.path) + with self.lock: + self.mtime = mtime + if self.updating: + # store the fact that we will need to run it again + self.run_again = True + return + if self.update_thread: + self.update_thread.cancel() + self._start_update_countdown() + + def update(self): + logger.debug('Updating database %s...', self.path) + with self.lock: + self.last_import = time.time() + self.updating = True + + try: + # invoke reporead's primary method + read_repo(self.arch, self.path, {}) + finally: + logger.debug('Done updating database %s.', self.path) + with self.lock: + self.update_thread = None + self.updating = False + if self.run_again: + self.run_again = False + self._start_update_countdown() + + +class EventHandler(pyinotify.ProcessEvent): + '''Our main event handler which listens for database change events. Because + we are watching the whole directory, we filter down and only look at those + events dealing with files databases.''' + + def my_init(self, **kwargs): + self.databases = {} + self.arch_lookup = {} + + # we really want a single path to arch mapping, so massage the data + arch_paths = kwargs['arch_paths'] + for arch, paths in arch_paths.items(): + self.arch_lookup.update((path.rstrip('/'), arch) for path in paths) + + def process_default(self, event): + '''Primary event processing function which kicks off reporead timer + threads if a files database was updated.''' + if not event.name: + return + # screen to only the files we care about + if event.name.endswith('.files.tar.gz'): + path = event.pathname + stat = os.stat(path) + database = self.databases.get(path, None) + if database is None: + arch = self.arch_lookup.get(event.path, None) + if arch is None: + logger.warning( + 'Could not determine arch for %s, skipping update', + path) + return + database = Database(arch, path) + self.databases[path] = database + database.queue_for_update(stat.st_mtime) + + +# vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From d3b36e1ce992a8b70f4fe8fe9e8df74e835fb865 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 15 Nov 2011 22:03:01 -0600 Subject: reporead_inotify: cancel threads that haven't started yet on shutdown Signed-off-by: Dan McGee --- devel/management/commands/reporead_inotify.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'devel') diff --git a/devel/management/commands/reporead_inotify.py b/devel/management/commands/reporead_inotify.py index 135c0367..4c865ce1 100755 --- a/devel/management/commands/reporead_inotify.py +++ b/devel/management/commands/reporead_inotify.py @@ -54,6 +54,11 @@ class Command(BaseCommand): logger.info('Entering notifier loop') notifier.loop() + logger.info('Cancelling remaining threads...') + for thread in threading.enumerate(): + if hasattr(thread, 'cancel'): + thread.cancel() + def setup_notifier(self): '''Set up and configure the inotify machinery and logic. This takes the provided or default path_template and builds a list of -- cgit v1.2.3-2-g168b From c00e7e84045613ee2aa80f66b9972db971ab3f26 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 15 Nov 2011 23:43:42 -0600 Subject: reporead: clean up some debug logging Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index b4966834..45229524 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -36,6 +36,8 @@ logging.basicConfig( format='%(asctime)s -> %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', stream=sys.stderr) +TRACE = 5 +logging.addLevelName(TRACE, 'TRACE') logger = logging.getLogger() class Command(BaseCommand): @@ -368,7 +370,7 @@ def db_update(archname, reponame, pkgs, options): # packages in both database and in syncdb (update in database) pkg_in_both = syncset & dbset for p in [x for x in pkgs if x.name in pkg_in_both]: - logger.debug("Looking for package updates") + logger.debug("Checking package %s", p.name) dbp = dbdict[p.name] timestamp = None # for a force, we don't want to update the timestamp. @@ -406,7 +408,7 @@ def parse_info(iofile): continue elif line.startswith('%') and line.endswith('%'): blockname = line[1:-1].lower() - logger.debug("Parsing package block %s", blockname) + logger.log(TRACE, "Parsing package block %s", blockname) store[blockname] = [] elif blockname: store[blockname].append(line) @@ -456,7 +458,7 @@ def parse_repo(repopath): tarinfo.name) data_file.close() - logger.debug("Done parsing file %s", fname) + logger.debug("Done parsing file %s/%s", pkgid, fname) repodb.close() logger.info("Finished repo parsing, %d total packages", len(pkgs)) -- cgit v1.2.3-2-g168b From 404c4b400b2bd2a14e0363e33d66505c51903fe7 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 16 Nov 2011 12:48:36 -0600 Subject: reporead: a few small tweaks Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 45229524..ad76db4d 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -450,13 +450,14 @@ def parse_repo(repopath): continue data_file = repodb.extractfile(tarinfo) data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), - encoding='utf=8') + encoding='UTF-8') try: pkgs[pkgid].populate(parse_info(data_file)) except UnicodeDecodeError: logger.warn("Could not correctly decode %s, skipping file", tarinfo.name) data_file.close() + del data_file logger.debug("Done parsing file %s/%s", pkgid, fname) @@ -498,10 +499,10 @@ def read_repo(primary_arch, repo_file, options): logger.warning("Package %s arch = %s", package.name, package.arch) del packages - logger.info('Starting database updates.') + logger.info('Starting database updates for %s.', repo_file) for arch in sorted(packages_arches.keys()): db_update(arch, repo, packages_arches[arch], options) - logger.info('Finished database updates.') + logger.info('Finished database updates for %s.', repo_file) return 0 # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From b1e406d73844d5a30344ca8ac855fe850c52bc2f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 16 Nov 2011 12:49:17 -0600 Subject: reporead_inotify: spin up read_repo() in separate thread This prevents memory usage from ballooning to absolutely huge values, such as when multiple threads kick off at the same time. The bulk of our memory allocation obviously comes in these threads and not the main threads, so being able to isolate them in processes helps a lot. Signed-off-by: Dan McGee --- devel/management/commands/reporead_inotify.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead_inotify.py b/devel/management/commands/reporead_inotify.py index 4c865ce1..ffd49b8f 100755 --- a/devel/management/commands/reporead_inotify.py +++ b/devel/management/commands/reporead_inotify.py @@ -15,6 +15,7 @@ correctly. """ import logging +import multiprocessing import os.path import pyinotify import sys @@ -133,6 +134,7 @@ class Database(object): return if self.update_thread: self.update_thread.cancel() + self.update_thread = None self._start_update_countdown() def update(self): @@ -142,8 +144,13 @@ class Database(object): self.updating = True try: - # invoke reporead's primary method - read_repo(self.arch, self.path, {}) + # invoke reporead's primary method. we do this in a separate + # process for memory conservation purposes; these processes grow + # rather large so it is best to free up the memory ASAP. + process = multiprocessing.Process(target=read_repo, + args=[self.arch, self.path, {}]) + process.start() + process.join() finally: logger.debug('Done updating database %s.', self.path) with self.lock: -- cgit v1.2.3-2-g168b From aa20c798ca8af365b2549591700e932a74d068b8 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 16 Nov 2011 13:02:35 -0600 Subject: reporead_inotify: close connection once we are done with it This prevents an otherwise idle connection from sitting around and being totally useless. Signed-off-by: Dan McGee --- devel/management/commands/reporead_inotify.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'devel') diff --git a/devel/management/commands/reporead_inotify.py b/devel/management/commands/reporead_inotify.py index ffd49b8f..acb53a54 100755 --- a/devel/management/commands/reporead_inotify.py +++ b/devel/management/commands/reporead_inotify.py @@ -23,6 +23,7 @@ import threading import time from django.core.management.base import BaseCommand, CommandError +from django.db import connection from main.models import Arch, Repo from .reporead import read_repo @@ -90,6 +91,11 @@ class Command(BaseCommand): raise CommandError('path template did not uniquely ' 'determine architecture for each file') + # this thread is done using the database; all future access is done in + # the spawned read_repo() processes, so close the otherwise completely + # idle connection. + connection.close() + # A proper atomic replacement of the database as done by rsync is type # IN_MOVED_TO. repo-add/remove will finish with a IN_CLOSE_WRITE. mask = pyinotify.IN_CLOSE_WRITE | pyinotify.IN_MOVED_TO -- cgit v1.2.3-2-g168b From 9d2fdbe5bc6a0d9ab2907b377056851fc5eb56c3 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 17 Nov 2011 10:17:55 -0600 Subject: reporead_inotify: nice the spawned subprocesses This prevents the reporead job from taking over time from more important processes; this is not a rush task. Signed-off-by: Dan McGee --- devel/management/commands/reporead_inotify.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead_inotify.py b/devel/management/commands/reporead_inotify.py index acb53a54..c74762eb 100755 --- a/devel/management/commands/reporead_inotify.py +++ b/devel/management/commands/reporead_inotify.py @@ -16,7 +16,7 @@ correctly. import logging import multiprocessing -import os.path +import os import pyinotify import sys import threading @@ -113,10 +113,11 @@ class Database(object): various bits of metadata and state representing the file path, when we last updated, how long our delay is before performing the update, whether we are updating now, etc.''' - def __init__(self, arch, path, delay=60.0): + def __init__(self, arch, path, delay=60.0, nice=3): self.arch = arch self.path = path self.delay = delay + self.nice = nice self.mtime = None self.last_import = None self.update_thread = None @@ -153,8 +154,12 @@ class Database(object): # invoke reporead's primary method. we do this in a separate # process for memory conservation purposes; these processes grow # rather large so it is best to free up the memory ASAP. - process = multiprocessing.Process(target=read_repo, - args=[self.arch, self.path, {}]) + def run(): + if self.nice != 0: + os.nice(self.nice) + read_repo(self.arch, self.path, {}) + + process = multiprocessing.Process(target=run) process.start() process.join() finally: -- cgit v1.2.3-2-g168b From a9819e3d715ce3e5c20c9665db9a6100f06ab562 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 17 Nov 2011 12:34:12 -0600 Subject: Ensure reporead is protected against simultaneous runs This adds a bunch of transaction magic and SELECT FOR UPDATE stuff to reporead to cope with the now-concurrent runs of reporead we get when invoked from our inotify-based updater. The collision occurs with 'any' architecture packages as both repo databases contain the new version, and the updates occur at exactly the same time. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 206 +++++++++++++++++----------------- 1 file changed, 106 insertions(+), 100 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index ad76db4d..b6bd8457 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -13,10 +13,6 @@ Example: ./manage.py reporead i686 /tmp/core.db.tar.gz """ -from django.core.management.base import BaseCommand, CommandError -from django.contrib.auth.models import User -from django.db import transaction - from collections import defaultdict import io import os @@ -27,6 +23,11 @@ import logging from datetime import datetime from optparse import make_option +from django.core.management.base import BaseCommand, CommandError +from django.contrib.auth.models import User +from django.db import connections, router, transaction +from django.db.utils import IntegrityError + from devel.utils import UserFinder from main.models import Arch, Package, PackageDepend, PackageFile, Repo from packages.models import Conflict, Provision, Replacement @@ -189,8 +190,6 @@ def create_multivalued(dbpkg, repopkg, db_attr, repo_attr): finder = UserFinder() def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): - db_score = 1 - if repopkg.base: dbpkg.pkgbase = repopkg.base else: @@ -214,7 +213,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.last_update = timestamp dbpkg.save() - db_score += populate_files(dbpkg, repopkg, force=force) + populate_files(dbpkg, repopkg, force=force) dbpkg.packagedepend_set.all().delete() for y in repopkg.depends: @@ -235,28 +234,23 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): create_multivalued(dbpkg, repopkg, 'groups', 'groups') create_multivalued(dbpkg, repopkg, 'licenses', 'license') - related_score = (len(repopkg.depends) + len(repopkg.optdepends) - + len(repopkg.conflicts) + len(repopkg.provides) - + len(repopkg.replaces) + len(repopkg.groups) - + len(repopkg.license)) - if related_score: - db_score += (related_score / 20) + 1 - return db_score +pkg_same_version = lambda pkg, dbpkg: pkg.ver == dbpkg.pkgver \ + and pkg.rel == dbpkg.pkgrel and pkg.epoch == dbpkg.epoch def populate_files(dbpkg, repopkg, force=False): if not force: - if dbpkg.pkgver != repopkg.ver or dbpkg.pkgrel != repopkg.rel \ - or dbpkg.epoch != repopkg.epoch: + if not pkg_same_version(repopkg, dbpkg): logger.info("DB version (%s) didn't match repo version " "(%s) for package %s, skipping file list addition", dbpkg.full_version, repopkg.full_version, dbpkg.pkgname) - return 0 + return if not dbpkg.files_last_update or not dbpkg.last_update: pass elif dbpkg.files_last_update > dbpkg.last_update: - return 0 + return + # only delete files if we are reading a DB that contains them if repopkg.has_files: dbpkg.packagefile_set.all().delete() @@ -275,30 +269,19 @@ def populate_files(dbpkg, repopkg, force=False): pkgfile.save(force_insert=True) dbpkg.files_last_update = datetime.utcnow() dbpkg.save() - return (len(repopkg.files) / 50) + 1 - return 0 - -class Batcher(object): - def __init__(self, threshold, start=0): - self.threshold = threshold - self.meter = start - def batch_commit(self, score): - """ - Track updates to the database and perform a commit if the batch - becomes sufficiently large. "Large" is defined by waiting for the - sum of scores to exceed the arbitrary threshold value; once it is - hit a commit is issued. - """ - self.meter += score - if self.meter > self.threshold: - logger.debug("Committing transaction, batch threshold hit") - transaction.commit() - self.meter = 0 +def select_pkg_for_update(dbpkg): + database = router.db_for_write(Package, instance=dbpkg) + connection = connections[database] + if 'sqlite' in connection.settings_dict['ENGINE'].lower(): + return dbpkg + new_pkg = Package.objects.raw( + 'SELECT * FROM packages WHERE id = %s FOR UPDATE', + [dbpkg.id]) + return list(new_pkg)[0] -@transaction.commit_on_success def db_update(archname, reponame, pkgs, options): """ Parses a list and updates the Arch dev database accordingly. @@ -310,88 +293,111 @@ def db_update(archname, reponame, pkgs, options): logger.info('Updating Arch: %s', archname) force = options.get('force', False) filesonly = options.get('filesonly', False) - repository = Repo.objects.get(name__iexact=reponame) - architecture = Arch.objects.get(name__iexact=archname) - # no-arg order_by() removes even the default ordering; we don't need it - dbpkgs = Package.objects.filter( - arch=architecture, repo=repository).order_by() - # This makes our inner loop where we find packages by name *way* more - # efficient by not having to go to the database for each package to - # SELECT them by name. - dbdict = dict([(pkg.pkgname, pkg) for pkg in dbpkgs]) - - logger.debug("Creating sets") - dbset = set(dbdict.keys()) - syncset = set([pkg.name for pkg in pkgs]) - logger.info("%d packages in current web DB", len(dbset)) - logger.info("%d packages in new updating db", len(syncset)) - in_sync_not_db = syncset - dbset - logger.info("%d packages in sync not db", len(in_sync_not_db)) - - # Try to catch those random package deletions that make Eric so unhappy. - if len(dbset): - dbpercent = 100.0 * len(syncset) / len(dbset) - else: - dbpercent = 0.0 - logger.info("DB package ratio: %.1f%%", dbpercent) - - # Fewer than 20 packages makes the percentage check unreliable, but it also - # means we expect the repo to fluctuate a lot. - msg = "Package database has %.1f%% the number of packages in the " \ - "web database" % dbpercent - if len(dbset) == 0 and len(syncset) == 0: - pass - elif not filesonly and \ - len(dbset) > 20 and dbpercent < 50.0 and \ - not repository.testing and not repository.staging: - logger.error(msg) - raise Exception(msg) - elif dbpercent < 75.0: - logger.warning(msg) - - batcher = Batcher(100) + + with transaction.commit_manually(): + repository = Repo.objects.get(name__iexact=reponame) + architecture = Arch.objects.get(name__iexact=archname) + # no-arg order_by() removes even the default ordering; we don't need it + dbpkgs = Package.objects.filter( + arch=architecture, repo=repository).order_by() + # This makes our inner loop where we find packages by name *way* more + # efficient by not having to go to the database for each package to + # SELECT them by name. + dbdict = dict((dbpkg.pkgname, dbpkg) for dbpkg in dbpkgs) + + logger.debug("Creating sets") + dbset = set(dbdict.keys()) + syncset = set([pkg.name for pkg in pkgs]) + logger.info("%d packages in current web DB", len(dbset)) + logger.info("%d packages in new updating db", len(syncset)) + in_sync_not_db = syncset - dbset + logger.info("%d packages in sync not db", len(in_sync_not_db)) + + # Try to catch those random package deletions that make Eric so unhappy. + if len(dbset): + dbpercent = 100.0 * len(syncset) / len(dbset) + else: + dbpercent = 0.0 + logger.info("DB package ratio: %.1f%%", dbpercent) + + # Fewer than 20 packages makes the percentage check unreliable, but it also + # means we expect the repo to fluctuate a lot. + msg = "Package database has %.1f%% the number of packages in the " \ + "web database" % dbpercent + if len(dbset) == 0 and len(syncset) == 0: + pass + elif not filesonly and \ + len(dbset) > 20 and dbpercent < 50.0 and \ + not repository.testing and not repository.staging: + logger.error(msg) + raise Exception(msg) + elif dbpercent < 75.0: + logger.warning(msg) + + # If isolation level is repeatable-read, we need to ensure each package + # update starts a new transaction and re-queries the database as necessary + # to guard against simultaneous updates + transaction.commit() if not filesonly: # packages in syncdb and not in database (add to database) - for p in [x for x in pkgs if x.name in in_sync_not_db]: - logger.info("Adding package %s", p.name) - pkg = Package(pkgname=p.name, arch=architecture, repo=repository) - score = populate_pkg(pkg, p, timestamp=datetime.utcnow()) - batcher.batch_commit(score) + for pkg in (pkg for pkg in pkgs if pkg.name in in_sync_not_db): + logger.info("Adding package %s", pkg.name) + dbpkg = Package(pkgname=pkg.name, arch=architecture, repo=repository) + try: + with transaction.commit_on_success(): + populate_pkg(dbpkg, pkg, timestamp=datetime.utcnow()) + except IntegrityError: + logger.warning("Could not add package %s; " + "not fatal if another thread beat us to it.", + pkg.name, exc_info=True) # packages in database and not in syncdb (remove from database) - in_db_not_sync = dbset - syncset - for p in in_db_not_sync: - logger.info("Removing package %s", p) - dbp = dbdict[p] - dbp.delete() - batcher.batch_commit(1) + for pkgname in (dbset - syncset): + logger.info("Removing package %s", pkgname) + dbpkg = dbdict[pkgname] + with transaction.commit_on_success(): + # no race condition here as long as simultaneous threads both + # issue deletes; second delete will be a no-op + dbpkg.delete() # packages in both database and in syncdb (update in database) pkg_in_both = syncset & dbset - for p in [x for x in pkgs if x.name in pkg_in_both]: - logger.debug("Checking package %s", p.name) - dbp = dbdict[p.name] + for pkg in (x for x in pkgs if x.name in pkg_in_both): + logger.debug("Checking package %s", pkg.name) + dbpkg = dbdict[pkg.name] timestamp = None # for a force, we don't want to update the timestamp. # for a non-force, we don't want to do anything at all. if filesonly: pass - elif p.ver == dbp.pkgver and p.rel == dbp.pkgrel \ - and p.epoch == dbp.epoch: + elif pkg_same_version(pkg, dbpkg): if not force: continue else: timestamp = datetime.utcnow() + # The odd select_for_update song and dance here are to ensure + # simultaneous updates don't happen on a package, causing + # files/depends/all related items to be double-imported. if filesonly: - logger.debug("Checking files for package %s", p.name) - score = populate_files(dbp, p, force=force) + with transaction.commit_on_success(): + # TODO Django 1.4 select_for_update() will work once released + dbpkg = select_pkg_for_update(dbpkg) + if pkg_same_version(pkg, dbpkg): + logger.debug("Package %s was already updated", pkg.name) + continue + logger.debug("Checking files for package %s", pkg.name) + populate_files(dbpkg, pkg, force=force) else: - logger.info("Updating package %s", p.name) - score = populate_pkg(dbp, p, force=force, timestamp=timestamp) - - batcher.batch_commit(score) + with transaction.commit_on_success(): + # TODO Django 1.4 select_for_update() will work once released + dbpkg = select_pkg_for_update(dbpkg) + if pkg_same_version(pkg, dbpkg): + logger.debug("Package %s was already updated", pkg.name) + continue + logger.info("Updating package %s", pkg.name) + populate_pkg(dbpkg, pkg, force=force, timestamp=timestamp) logger.info('Finished updating Arch: %s', archname) -- cgit v1.2.3-2-g168b From 2cb4f97bb235217d6e56deded1444f5e84f08b71 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 17 Nov 2011 13:36:27 -0600 Subject: reporead: don't trim pkgdesc length Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index b6bd8457..cf101d97 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -73,7 +73,7 @@ class Command(BaseCommand): class Pkg(object): """An interim 'container' object for holding Arch package data.""" - bare = ( 'name', 'base', 'arch', 'desc', 'filename', + bare = ( 'name', 'base', 'arch', 'filename', 'md5sum', 'sha256sum', 'url', 'packager' ) number = ( 'csize', 'isize' ) collections = ( 'depends', 'optdepends', 'conflicts', @@ -101,8 +101,8 @@ class Pkg(object): setattr(self, k, v[0][:254]) elif k in self.number: setattr(self, k, long(v[0])) - elif k == 'pgpsig': - # do NOT prune this value at all + elif k in ('desc', 'pgpsig'): + # do NOT prune these values at all setattr(self, k, v[0]) elif k == 'version': match = self.version_re.match(v[0]) -- cgit v1.2.3-2-g168b From 85657db05d7f65604340699cfcb9967c9e81a0ef Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Mon, 21 Nov 2011 10:08:23 -0600 Subject: Better support for non-latin full names Add a 'latin_name' field to the user profile so we can better support those developers with names in non-Latin scripts, and yet still show a Latin name as necessary on the developer profile page. This field only shows up if populated. Also, use consistent sorting everywhere- rather than using username, always use first_name and last_name fields. Signed-off-by: Dan McGee --- devel/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'devel') diff --git a/devel/views.py b/devel/views.py index 0002df04..08b19cd7 100644 --- a/devel/views.py +++ b/devel/views.py @@ -83,7 +83,7 @@ def index(request): @never_cache def clock(request): devs = User.objects.filter(is_active=True).order_by( - 'username').select_related('userprofile') + 'first_name', 'last_name').select_related('userprofile') now = datetime.now() utc_now = datetime.utcnow().replace(tzinfo=pytz.utc) -- cgit v1.2.3-2-g168b From ac34e358103ee718369692b9ba5afe6830a1df92 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 30 Nov 2011 12:25:54 -0600 Subject: reporead: fix filesonly needs update checks This was broken after the select for update changes. We really should split the whole filesonly update into another method instead of the current shotgun approach with conditionals everywhere. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index cf101d97..f8cc2034 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -382,11 +382,13 @@ def db_update(archname, reponame, pkgs, options): # files/depends/all related items to be double-imported. if filesonly: with transaction.commit_on_success(): + if not dbpkg.files_last_update or not dbpkg.last_update: + pass + elif dbpkg.files_last_update > dbpkg.last_update: + logger.debug("Files for %s are up to date", pkg.name) + continue # TODO Django 1.4 select_for_update() will work once released dbpkg = select_pkg_for_update(dbpkg) - if pkg_same_version(pkg, dbpkg): - logger.debug("Package %s was already updated", pkg.name) - continue logger.debug("Checking files for package %s", pkg.name) populate_files(dbpkg, pkg, force=force) else: -- cgit v1.2.3-2-g168b From dca9fd2ea687b8d20fd5c39c1449846ddef1e3c2 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 30 Nov 2011 13:04:15 -0600 Subject: reporead: split out filesonly update method This removes a bunch of the conditional logic at a slight cost of some code duplication. However, the methods and madness is now much easier to follow. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 170 +++++++++++++++++++--------------- 1 file changed, 95 insertions(+), 75 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index f8cc2034..e4ba8580 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -282,40 +282,20 @@ def select_pkg_for_update(dbpkg): return list(new_pkg)[0] -def db_update(archname, reponame, pkgs, options): - """ - Parses a list and updates the Arch dev database accordingly. - - Arguments: - pkgs -- A list of Pkg objects. - - """ - logger.info('Updating Arch: %s', archname) - force = options.get('force', False) - filesonly = options.get('filesonly', False) - +def update_common(archname, reponame, pkgs, sanity_check=True): with transaction.commit_manually(): repository = Repo.objects.get(name__iexact=reponame) architecture = Arch.objects.get(name__iexact=archname) # no-arg order_by() removes even the default ordering; we don't need it dbpkgs = Package.objects.filter( arch=architecture, repo=repository).order_by() - # This makes our inner loop where we find packages by name *way* more - # efficient by not having to go to the database for each package to - # SELECT them by name. - dbdict = dict((dbpkg.pkgname, dbpkg) for dbpkg in dbpkgs) - - logger.debug("Creating sets") - dbset = set(dbdict.keys()) - syncset = set([pkg.name for pkg in pkgs]) - logger.info("%d packages in current web DB", len(dbset)) - logger.info("%d packages in new updating db", len(syncset)) - in_sync_not_db = syncset - dbset - logger.info("%d packages in sync not db", len(in_sync_not_db)) + + logger.info("%d packages in current web DB", len(dbpkgs)) + logger.info("%d packages in new updating DB", len(pkgs)) # Try to catch those random package deletions that make Eric so unhappy. - if len(dbset): - dbpercent = 100.0 * len(syncset) / len(dbset) + if len(dbpkgs): + dbpercent = 100.0 * len(pkgs) / len(dbpkgs) else: dbpercent = 0.0 logger.info("DB package ratio: %.1f%%", dbpercent) @@ -324,11 +304,13 @@ def db_update(archname, reponame, pkgs, options): # means we expect the repo to fluctuate a lot. msg = "Package database has %.1f%% the number of packages in the " \ "web database" % dbpercent - if len(dbset) == 0 and len(syncset) == 0: + if not sanity_check: + pass + elif repository.testing or repository.staging: pass - elif not filesonly and \ - len(dbset) > 20 and dbpercent < 50.0 and \ - not repository.testing and not repository.staging: + elif len(dbpkgs) == 0 and len(pkgs) == 0: + pass + elif len(dbpkgs) > 20 and dbpercent < 50.0: logger.error(msg) raise Exception(msg) elif dbpercent < 75.0: @@ -339,27 +321,45 @@ def db_update(archname, reponame, pkgs, options): # to guard against simultaneous updates transaction.commit() - if not filesonly: - # packages in syncdb and not in database (add to database) - for pkg in (pkg for pkg in pkgs if pkg.name in in_sync_not_db): - logger.info("Adding package %s", pkg.name) - dbpkg = Package(pkgname=pkg.name, arch=architecture, repo=repository) - try: - with transaction.commit_on_success(): - populate_pkg(dbpkg, pkg, timestamp=datetime.utcnow()) - except IntegrityError: - logger.warning("Could not add package %s; " - "not fatal if another thread beat us to it.", - pkg.name, exc_info=True) - - # packages in database and not in syncdb (remove from database) - for pkgname in (dbset - syncset): - logger.info("Removing package %s", pkgname) - dbpkg = dbdict[pkgname] + return dbpkgs + +def db_update(archname, reponame, pkgs, force=False): + """ + Parses a list of packages and updates the packages database accordingly. + """ + logger.info('Updating %s (%s)', reponame, archname) + dbpkgs = update_common(archname, reponame, pkgs, True) + + # This makes our inner loop where we find packages by name *way* more + # efficient by not having to go to the database for each package to + # SELECT them by name. + dbdict = dict((dbpkg.pkgname, dbpkg) for dbpkg in dbpkgs) + + dbset = set(dbdict.keys()) + syncset = set([pkg.name for pkg in pkgs]) + + in_sync_not_db = syncset - dbset + logger.info("%d packages in sync not db", len(in_sync_not_db)) + # packages in syncdb and not in database (add to database) + for pkg in (pkg for pkg in pkgs if pkg.name in in_sync_not_db): + logger.info("Adding package %s", pkg.name) + dbpkg = Package(pkgname=pkg.name, arch=architecture, repo=repository) + try: with transaction.commit_on_success(): - # no race condition here as long as simultaneous threads both - # issue deletes; second delete will be a no-op - dbpkg.delete() + populate_pkg(dbpkg, pkg, timestamp=datetime.utcnow()) + except IntegrityError: + logger.warning("Could not add package %s; " + "not fatal if another thread beat us to it.", + pkg.name, exc_info=True) + + # packages in database and not in syncdb (remove from database) + for pkgname in (dbset - syncset): + logger.info("Removing package %s", pkgname) + dbpkg = dbdict[pkgname] + with transaction.commit_on_success(): + # no race condition here as long as simultaneous threads both + # issue deletes; second delete will be a no-op + dbpkg.delete() # packages in both database and in syncdb (update in database) pkg_in_both = syncset & dbset @@ -369,9 +369,7 @@ def db_update(archname, reponame, pkgs, options): timestamp = None # for a force, we don't want to update the timestamp. # for a non-force, we don't want to do anything at all. - if filesonly: - pass - elif pkg_same_version(pkg, dbpkg): + if pkg_same_version(pkg, dbpkg): if not force: continue else: @@ -380,28 +378,45 @@ def db_update(archname, reponame, pkgs, options): # The odd select_for_update song and dance here are to ensure # simultaneous updates don't happen on a package, causing # files/depends/all related items to be double-imported. - if filesonly: - with transaction.commit_on_success(): - if not dbpkg.files_last_update or not dbpkg.last_update: - pass - elif dbpkg.files_last_update > dbpkg.last_update: - logger.debug("Files for %s are up to date", pkg.name) - continue - # TODO Django 1.4 select_for_update() will work once released - dbpkg = select_pkg_for_update(dbpkg) - logger.debug("Checking files for package %s", pkg.name) - populate_files(dbpkg, pkg, force=force) - else: - with transaction.commit_on_success(): - # TODO Django 1.4 select_for_update() will work once released - dbpkg = select_pkg_for_update(dbpkg) - if pkg_same_version(pkg, dbpkg): - logger.debug("Package %s was already updated", pkg.name) - continue - logger.info("Updating package %s", pkg.name) - populate_pkg(dbpkg, pkg, force=force, timestamp=timestamp) + with transaction.commit_on_success(): + # TODO Django 1.4 select_for_update() will work once released + dbpkg = select_pkg_for_update(dbpkg) + if pkg_same_version(pkg, dbpkg): + logger.debug("Package %s was already updated", pkg.name) + continue + logger.info("Updating package %s", pkg.name) + populate_pkg(dbpkg, pkg, force=force, timestamp=timestamp) + + logger.info('Finished updating arch: %s', archname) + + +def filesonly_update(archname, reponame, pkgs, force=False): + """ + Parses a list of packages and updates the packages database accordingly. + """ + logger.info('Updating files for %s (%s)', reponame, archname) + dbpkgs = update_common(archname, reponame, pkgs, False) + dbdict = dict((dbpkg.pkgname, dbpkg) for dbpkg in dbpkgs) + dbset = set(dbdict.keys()) + + for pkg in (pkg for pkg in pkgs if pkg.name in dbset): + dbpkg = dbdict[pkg.name] + + # The odd select_for_update song and dance here are to ensure + # simultaneous updates don't happen on a package, causing + # files to be double-imported. + with transaction.commit_on_success(): + if not dbpkg.files_last_update or not dbpkg.last_update: + pass + elif dbpkg.files_last_update > dbpkg.last_update: + logger.debug("Files for %s are up to date", pkg.name) + continue + # TODO Django 1.4 select_for_update() will work once released + dbpkg = select_pkg_for_update(dbpkg) + logger.debug("Checking files for package %s", pkg.name) + populate_files(dbpkg, pkg, force=force) - logger.info('Finished updating Arch: %s', archname) + logger.info('Finished updating arch: %s', archname) def parse_info(iofile): @@ -490,6 +505,8 @@ def read_repo(primary_arch, repo_file, options): """ # always returns an Arch object, regardless of what is passed in primary_arch = locate_arch(primary_arch) + force = options.get('force', False) + filesonly = options.get('filesonly', False) repo, packages = parse_repo(repo_file) @@ -509,7 +526,10 @@ def read_repo(primary_arch, repo_file, options): logger.info('Starting database updates for %s.', repo_file) for arch in sorted(packages_arches.keys()): - db_update(arch, repo, packages_arches[arch], options) + if filesonly: + filesonly_update(arch, repo, packages_arches[arch], force) + else: + db_update(arch, repo, packages_arches[arch], force) logger.info('Finished database updates for %s.', repo_file) return 0 -- cgit v1.2.3-2-g168b From 04e830a1dad665da0b05c3c8349eff21667d805d Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 30 Nov 2011 13:11:14 -0600 Subject: Add initial migration for devel app Signed-off-by: Dan McGee --- devel/migrations/0001_initial.py | 18 ++++++++++++++++++ devel/migrations/__init__.py | 0 2 files changed, 18 insertions(+) create mode 100644 devel/migrations/0001_initial.py create mode 100644 devel/migrations/__init__.py (limited to 'devel') diff --git a/devel/migrations/0001_initial.py b/devel/migrations/0001_initial.py new file mode 100644 index 00000000..c28fc20f --- /dev/null +++ b/devel/migrations/0001_initial.py @@ -0,0 +1,18 @@ +# encoding: utf-8 +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + +class Migration(SchemaMigration): + + def forwards(self, orm): + pass + + + def backwards(self, orm): + pass + + + models = {} + + complete_apps = ['devel'] diff --git a/devel/migrations/__init__.py b/devel/migrations/__init__.py new file mode 100644 index 00000000..e69de29b -- cgit v1.2.3-2-g168b From da61fed4be6a28c870580f448c7c486f46d7b088 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 30 Nov 2011 13:31:51 -0600 Subject: Add master key model and admin integration --- devel/admin.py | 12 +++++ devel/migrations/0002_auto__add_masterkey.py | 76 ++++++++++++++++++++++++++++ devel/models.py | 17 +++++++ 3 files changed, 105 insertions(+) create mode 100644 devel/admin.py create mode 100644 devel/migrations/0002_auto__add_masterkey.py (limited to 'devel') diff --git a/devel/admin.py b/devel/admin.py new file mode 100644 index 00000000..84082fb8 --- /dev/null +++ b/devel/admin.py @@ -0,0 +1,12 @@ +from django.contrib import admin + +from .models import MasterKey + + +class MasterKeyAdmin(admin.ModelAdmin): + list_display = ('pgp_key', 'owner', 'created', 'revoker', 'revoked') + search_fields = ('pgp_key', 'owner', 'revoker') + +admin.site.register(MasterKey, MasterKeyAdmin) + +# vim: set ts=4 sw=4 et: diff --git a/devel/migrations/0002_auto__add_masterkey.py b/devel/migrations/0002_auto__add_masterkey.py new file mode 100644 index 00000000..ac1f745a --- /dev/null +++ b/devel/migrations/0002_auto__add_masterkey.py @@ -0,0 +1,76 @@ +# encoding: utf-8 +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + +class Migration(SchemaMigration): + + depends_on = ( + ("main", "0051_auto__chg_field_userprofile_pgp_key"), + ) + + def forwards(self, orm): + # Adding model 'MasterKey' + db.create_table('devel_masterkey', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('owner', self.gf('django.db.models.fields.related.ForeignKey')(related_name='masterkey_owner', to=orm['auth.User'])), + ('revoker', self.gf('django.db.models.fields.related.ForeignKey')(related_name='masterkey_revoker', to=orm['auth.User'])), + ('pgp_key', self.gf('main.fields.PGPKeyField')(max_length=40)), + ('created', self.gf('django.db.models.fields.DateTimeField')()), + ('revoked', self.gf('django.db.models.fields.DateTimeField')(null=True, blank=True)), + )) + db.send_create_signal('devel', ['MasterKey']) + + def backwards(self, orm): + db.delete_table('devel_masterkey') + + + models = { + 'auth.group': { + 'Meta': {'object_name': 'Group'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}), + 'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}) + }, + 'auth.permission': { + 'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'}, + 'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}) + }, + 'auth.user': { + 'Meta': {'object_name': 'User'}, + 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}), + 'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}), + 'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'}) + }, + 'contenttypes.contenttype': { + 'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"}, + 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) + }, + 'devel.masterkey': { + 'Meta': {'object_name': 'MasterKey'}, + 'created': ('django.db.models.fields.DateTimeField', [], {}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'owner': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'masterkey_owner'", 'to': "orm['auth.User']"}), + 'pgp_key': ('main.fields.PGPKeyField', [], {'max_length': '40'}), + 'revoked': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), + 'revoker': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'masterkey_revoker'", 'to': "orm['auth.User']"}) + } + } + + complete_apps = ['devel'] diff --git a/devel/models.py b/devel/models.py index e69de29b..ea0f4d55 100644 --- a/devel/models.py +++ b/devel/models.py @@ -0,0 +1,17 @@ +from django.db import models +from django.contrib.auth.models import User + +from main.fields import PGPKeyField + + +class MasterKey(models.Model): + owner = models.ForeignKey(User, related_name='masterkey_owner', + help_text="The developer holding this master key") + revoker = models.ForeignKey(User, related_name='masterkey_revoker', + help_text="The developer holding the revocation certificate") + pgp_key = PGPKeyField(max_length=40, verbose_name="PGP key fingerprint", + help_text="consists of 40 hex digits; use `gpg --fingerprint`") + created = models.DateTimeField() + revoked = models.DateTimeField(null=True, blank=True) + +# vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From 4590196d79273c49172e2da74e7a7b31e59d7a27 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 30 Nov 2011 14:07:35 -0600 Subject: Integrate master key into rest of site Signed-off-by: Dan McGee --- devel/management/commands/generate_keyring.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'devel') diff --git a/devel/management/commands/generate_keyring.py b/devel/management/commands/generate_keyring.py index 35ab8874..a3a764b4 100644 --- a/devel/management/commands/generate_keyring.py +++ b/devel/management/commands/generate_keyring.py @@ -13,6 +13,7 @@ import logging import subprocess import sys +from devel.models import MasterKey from main.models import UserProfile logging.basicConfig( @@ -48,11 +49,14 @@ def generate_keyring(keyserver, keyring): pgp_key__isnull=False).extra(where=["pgp_key != ''"]).values_list( "pgp_key", flat=True) logger.info("%d keys fetched from user profiles", len(key_ids)) + master_key_ids = MasterKey.objects.values_list("pgp_key", flat=True) + logger.info("%d keys fetched from master keys", len(master_key_ids)) gpg_cmd = ["gpg", "--no-default-keyring", "--keyring", keyring, "--keyserver", keyserver, "--recv-keys"] logger.info("running command: %r", gpg_cmd) gpg_cmd.extend(key_ids) + gpg_cmd.extend(master_key_ids) subprocess.check_call(gpg_cmd) logger.info("keyring at %s successfully updated", keyring) -- cgit v1.2.3-2-g168b From 7c84bea7dabdfbc307d373620b00214777d91a97 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 30 Nov 2011 14:25:51 -0600 Subject: Allow generation of an ownertrust file Signed-off-by: Dan McGee --- devel/management/commands/generate_keyring.py | 29 ++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'devel') diff --git a/devel/management/commands/generate_keyring.py b/devel/management/commands/generate_keyring.py index a3a764b4..062c738b 100644 --- a/devel/management/commands/generate_keyring.py +++ b/devel/management/commands/generate_keyring.py @@ -24,7 +24,7 @@ logging.basicConfig( logger = logging.getLogger() class Command(BaseCommand): - args = " " + args = " [ownertrust_path]" help = "Assemble a GPG keyring with all known developer keys." def handle(self, *args, **options): @@ -36,10 +36,14 @@ class Command(BaseCommand): elif v == 2: logger.level = logging.DEBUG - if len(args) != 2: + if len(args) < 2: raise CommandError("keyserver and keyring_path must be provided") - return generate_keyring(args[0], args[1]) + generate_keyring(args[0], args[1]) + + if len(args) > 2: + generate_ownertrust(args[2]) + def generate_keyring(keyserver, keyring): logger.info("getting all known key IDs") @@ -60,4 +64,23 @@ def generate_keyring(keyserver, keyring): subprocess.check_call(gpg_cmd) logger.info("keyring at %s successfully updated", keyring) + +TRUST_LEVELS = { + 'unknown': 0, + 'expired': 1, + 'undefined': 2, + 'never': 3, + 'marginal': 4, + 'fully': 5, + 'ultimate': 6, +} + + +def generate_ownertrust(trust_path): + master_key_ids = MasterKey.objects.values_list("pgp_key", flat=True) + with open(trust_path, "w") as trustfile: + for key_id in master_key_ids: + trustfile.write("%s:%d:\n" % (key_id, TRUST_LEVELS['marginal'])) + logger.info("trust file at %s created or overwritten", trust_path) + # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From 025042ef0d23378a5f10fe1cff8f660f51638615 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 30 Nov 2011 14:45:05 -0600 Subject: Master keys: reduce query count, add default ordering Signed-off-by: Dan McGee --- devel/models.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'devel') diff --git a/devel/models.py b/devel/models.py index ea0f4d55..f31b8fbb 100644 --- a/devel/models.py +++ b/devel/models.py @@ -14,4 +14,7 @@ class MasterKey(models.Model): created = models.DateTimeField() revoked = models.DateTimeField(null=True, blank=True) + class Meta: + ordering = ('created',) + # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From 4d02cd5b5d4437dd1543e2d45044db72da1989f4 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 30 Nov 2011 23:56:12 -0600 Subject: reporead: fix not defined variable Way to fail at refactoring, Dan. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'devel') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index e4ba8580..c444538b 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -329,6 +329,8 @@ def db_update(archname, reponame, pkgs, force=False): """ logger.info('Updating %s (%s)', reponame, archname) dbpkgs = update_common(archname, reponame, pkgs, True) + repository = Repo.objects.get(name__iexact=reponame) + architecture = Arch.objects.get(name__iexact=archname) # This makes our inner loop where we find packages by name *way* more # efficient by not having to go to the database for each package to -- cgit v1.2.3-2-g168b From 623e0453cee5e3f663a0b18d68db0396cc812983 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 1 Dec 2011 23:35:41 -0500 Subject: I think this fixes all the broken links (that point to parabolagnulinux.org anyway) --- devel/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'devel') diff --git a/devel/views.py b/devel/views.py index 7cc45419..b9bd7cce 100644 --- a/devel/views.py +++ b/devel/views.py @@ -292,7 +292,7 @@ class NewUserForm(forms.ModelForm): send_mail("Your new parabolaweb account", template.render(ctx), - 'Parabola ', + 'Parabola ', [user.email], fail_silently=False) -- cgit v1.2.3-2-g168b