From 2d5777b11d229d115a31a6c82236570002c2dd57 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 21 Oct 2011 18:49:00 -0500 Subject: Add a generate_keyring command This grabs all the PGP keys from the developer profiles and adds them to the keyrings. Obviously we may want to do more in the future such as filter by groups, active status, etc. but this is just a first iteration. Signed-off-by: Dan McGee --- devel/management/commands/generate_keyring.py | 59 +++++++++++++++++++++++++++ devel/management/commands/rematch_packager.py | 2 +- 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 devel/management/commands/generate_keyring.py (limited to 'devel/management/commands') diff --git a/devel/management/commands/generate_keyring.py b/devel/management/commands/generate_keyring.py new file mode 100644 index 00000000..b95d5a8e --- /dev/null +++ b/devel/management/commands/generate_keyring.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +""" +generate_keyring command + +Assemble a GPG keyring with all known developer keys. + +Usage: ./manage.py generate_keyring +""" + +from django.core.management.base import BaseCommand, CommandError +from django.db.models import Q + +import logging +import subprocess +import sys + +from main.models import UserProfile + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s -> %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + stream=sys.stderr) +logger = logging.getLogger() + +class Command(BaseCommand): + args = " " + help = "Assemble a GPG keyring with all known developer keys." + + def handle(self, *args, **options): + v = int(options.get('verbosity', None)) + if v == 0: + logger.level = logging.ERROR + elif v == 1: + logger.level = logging.INFO + elif v == 2: + logger.level = logging.DEBUG + + if len(args) != 2: + raise CommandError("keyserver and keyring_path must be provided") + + return generate_keyring(args[0], args[1]) + +def generate_keyring(keyserver, keyring): + logger.info("getting all known key IDs") + + exclude = Q(pgp_key__isnull=True) & Q(pgp_key__exact="") + key_ids = UserProfile.objects.exclude( + exclude).values_list("pgp_key", flat=True) + logger.info("%d keys fetched from user profiles", len(key_ids)) + + gpg_cmd = ["gpg", "--no-default-keyring", "--keyring", keyring, + "--keyserver", keyserver, "--recv-keys"] + logger.info("running command: %r", gpg_cmd) + gpg_cmd.extend(key_ids) + subprocess.check_call(gpg_cmd) + logger.info("keyring at %s successfully updated", keyring) + +# vim: set ts=4 sw=4 et: diff --git a/devel/management/commands/rematch_packager.py b/devel/management/commands/rematch_packager.py index ba6e6a54..461d83ab 100644 --- a/devel/management/commands/rematch_packager.py +++ b/devel/management/commands/rematch_packager.py @@ -24,7 +24,7 @@ logging.basicConfig( logger = logging.getLogger() class Command(NoArgsCommand): - help = "Runs a check on all active mirror URLs to determine if they are reachable via IPv4 and/or v6." + help = "Match all packages with a packager_str but NULL packager_id to a packager if we can find one." def handle_noargs(self, **options): v = int(options.get('verbosity', None)) -- cgit v1.2.3-2-g168b From 1b83844b30d3271b8fb50757d827c7b8fe8b5585 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 26 Oct 2011 03:25:15 -0500 Subject: Ensure PGP signature values are not trimmed This makes them totally unusable for any real purpose down the road. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index cf597577..a8e3219e 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -73,7 +73,7 @@ class Command(BaseCommand): class Pkg(object): """An interim 'container' object for holding Arch package data.""" bare = ( 'name', 'base', 'arch', 'desc', 'filename', - 'md5sum', 'sha256sum', 'pgpsig', 'url', 'packager' ) + 'md5sum', 'sha256sum', 'url', 'packager' ) number = ( 'csize', 'isize' ) collections = ( 'depends', 'optdepends', 'conflicts', 'provides', 'replaces', 'groups', 'license', 'files' ) @@ -85,6 +85,7 @@ class Pkg(object): self.ver = None self.rel = None self.epoch = 0 + self.pgpsig = None for k in self.bare + self.number: setattr(self, k, None) for k in self.collections: @@ -99,6 +100,9 @@ class Pkg(object): setattr(self, k, v[0][:254]) elif k in self.number: setattr(self, k, long(v[0])) + elif k == 'pgpsig': + # do NOT prune this value at all + setattr(self, k, v[0]) elif k == 'version': match = self.version_re.match(v[0]) self.ver = match.group(3) -- cgit v1.2.3-2-g168b From ade2c08899abf77f6d836432c9988ad3f9652a95 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 1 Nov 2011 16:47:37 -0500 Subject: Really ensure we don't catch any NULL or blank values Fuck you too, Django. Signed-off-by: Dan McGee --- devel/management/commands/generate_keyring.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'devel/management/commands') diff --git a/devel/management/commands/generate_keyring.py b/devel/management/commands/generate_keyring.py index b95d5a8e..35ab8874 100644 --- a/devel/management/commands/generate_keyring.py +++ b/devel/management/commands/generate_keyring.py @@ -8,7 +8,6 @@ Usage: ./manage.py generate_keyring """ from django.core.management.base import BaseCommand, CommandError -from django.db.models import Q import logging import subprocess @@ -44,9 +43,10 @@ class Command(BaseCommand): def generate_keyring(keyserver, keyring): logger.info("getting all known key IDs") - exclude = Q(pgp_key__isnull=True) & Q(pgp_key__exact="") - key_ids = UserProfile.objects.exclude( - exclude).values_list("pgp_key", flat=True) + # Screw you Django, for not letting one natively do value != + key_ids = UserProfile.objects.filter(user__is_active=True, + pgp_key__isnull=False).extra(where=["pgp_key != ''"]).values_list( + "pgp_key", flat=True) logger.info("%d keys fetched from user profiles", len(key_ids)) gpg_cmd = ["gpg", "--no-default-keyring", "--keyring", keyring, -- cgit v1.2.3-2-g168b From 9550236a87fc65827e994bea108350a43d3f161f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 15 Nov 2011 21:26:57 -0600 Subject: Improve primary arch validation Ensure we can accept either a Arch object or an architecture name when passed to read_repo() by moving the validation there and being a bit more careful about typechecking and object lookup. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index a8e3219e..b4966834 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -51,8 +51,6 @@ class Command(BaseCommand): def handle(self, arch=None, filename=None, **options): if not arch: raise CommandError('Architecture is required.') - if not validate_arch(arch): - raise CommandError('Specified architecture %s is not currently known.' % arch) if not filename: raise CommandError('Package database file is required.') filename = os.path.normpath(filename) @@ -464,29 +462,38 @@ def parse_repo(repopath): logger.info("Finished repo parsing, %d total packages", len(pkgs)) return (reponame, pkgs.values()) -def validate_arch(archname): +def locate_arch(arch): "Check if arch is valid." - return Arch.objects.filter(name__iexact=archname).exists() + if isinstance(arch, Arch): + return arch + try: + return Arch.objects.get(name__iexact=arch) + except Arch.DoesNotExist: + raise CommandError( + 'Specified architecture %s is not currently known.' % arch) + def read_repo(primary_arch, repo_file, options): """ Parses repo.db.tar.gz file and returns exit status. """ + # always returns an Arch object, regardless of what is passed in + primary_arch = locate_arch(primary_arch) + repo, packages = parse_repo(repo_file) # group packages by arch -- to handle noarch stuff packages_arches = {} for arch in Arch.objects.filter(agnostic=True): packages_arches[arch.name] = [] - packages_arches[primary_arch] = [] + packages_arches[primary_arch.name] = [] for package in packages: if package.arch in packages_arches: packages_arches[package.arch].append(package) else: # we don't include mis-arched packages - logger.warning("Package %s arch = %s", - package.name, package.arch) + logger.warning("Package %s arch = %s", package.name, package.arch) del packages logger.info('Starting database updates.') -- cgit v1.2.3-2-g168b From 2a2df0074e39a797a0a4b5f7db7cfc9097301328 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 15 Nov 2011 21:28:42 -0600 Subject: Add new reporead_inotify management command This is the new on-the-fly updates hotness. Rather than continue to schedule reporead to run once an hour in cron or however else you ran it, this command can be run once and left running, and will automagically pick up on any database file changes and run an import. It operates on the files databases only; this will keep both the packages and files always in sync and remove the delay in updating, especially helpful for new testing packages. Signed-off-by: Dan McGee --- devel/management/commands/reporead_inotify.py | 188 ++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100755 devel/management/commands/reporead_inotify.py (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead_inotify.py b/devel/management/commands/reporead_inotify.py new file mode 100755 index 00000000..135c0367 --- /dev/null +++ b/devel/management/commands/reporead_inotify.py @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- +""" +reporead_inotify command + +Watches repo.files.tar.gz files for updates and parses them after a short delay +in order to catch all updates in a single bulk update. + +Usage: ./manage.py reporead_inotify [path_template] + +Where 'path_template' is an optional path_template for finding the +repo.files.tar.gz files. The form is '/srv/ftp/%(repo)s/os/%(arch)s/', which is +also the default template if none is specified. While 'repo' is not required to +be present in the path_template, note that 'arch' is so reporead can function +correctly. +""" + +import logging +import os.path +import pyinotify +import sys +import threading +import time + +from django.core.management.base import BaseCommand, CommandError + +from main.models import Arch, Repo +from .reporead import read_repo + +logging.basicConfig( + level=logging.WARNING, + format='%(asctime)s -> %(levelname)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + stream=sys.stderr) +logger = logging.getLogger() + +class Command(BaseCommand): + help = "Watch database files and run an update when necessary." + args = "[path_template]" + + def handle(self, path_template=None, **options): + v = int(options.get('verbosity', 0)) + if v == 0: + logger.level = logging.ERROR + elif v == 1: + logger.level = logging.INFO + elif v == 2: + logger.level = logging.DEBUG + + if not path_template: + path_template = '/srv/ftp/%(repo)s/os/%(arch)s/' + self.path_template = path_template + + notifier = self.setup_notifier() + logger.info('Entering notifier loop') + notifier.loop() + + def setup_notifier(self): + '''Set up and configure the inotify machinery and logic. + This takes the provided or default path_template and builds a list of + directories we need to watch for database updates. It then validates + and passes these on to the various pyinotify pieces as necessary and + finally builds and returns a notifier object.''' + arches = Arch.objects.filter(agnostic=False) + repos = Repo.objects.all() + arch_path_map = dict((arch, None) for arch in arches) + all_paths = set() + total_paths = 0 + for arch in arches: + combos = ({ 'repo': repo.name.lower(), 'arch': arch.name } + for repo in repos) + # take a python format string and generate all unique combinations + # of directories from it; using set() ensures we filter it down + paths = set(self.path_template % values for values in combos) + total_paths += len(paths) + all_paths |= paths + arch_path_map[arch] = paths + + logger.info('Watching %d total paths', total_paths) + logger.debug(all_paths) + + # sanity check- basically ensure every path we created from the + # template mapped to only one architecture + if total_paths != len(all_paths): + raise CommandError('path template did not uniquely ' + 'determine architecture for each file') + + # A proper atomic replacement of the database as done by rsync is type + # IN_MOVED_TO. repo-add/remove will finish with a IN_CLOSE_WRITE. + mask = pyinotify.IN_CLOSE_WRITE | pyinotify.IN_MOVED_TO + + manager = pyinotify.WatchManager() + for name in all_paths: + manager.add_watch(name, mask) + + handler = EventHandler(arch_paths=arch_path_map) + return pyinotify.Notifier(manager, handler) + + +class Database(object): + '''A object representing a pacman database on the filesystem. It stores + various bits of metadata and state representing the file path, when we last + updated, how long our delay is before performing the update, whether we are + updating now, etc.''' + def __init__(self, arch, path, delay=60.0): + self.arch = arch + self.path = path + self.delay = delay + self.mtime = None + self.last_import = None + self.update_thread = None + self.updating = False + self.run_again = False + self.lock = threading.Lock() + + def _start_update_countdown(self): + self.update_thread = threading.Timer(self.delay, self.update) + logger.info('Starting %.1f second countdown to update %s', + self.delay, self.path) + self.update_thread.start() + + def queue_for_update(self, mtime): + logger.debug('Queueing database %s...', self.path) + with self.lock: + self.mtime = mtime + if self.updating: + # store the fact that we will need to run it again + self.run_again = True + return + if self.update_thread: + self.update_thread.cancel() + self._start_update_countdown() + + def update(self): + logger.debug('Updating database %s...', self.path) + with self.lock: + self.last_import = time.time() + self.updating = True + + try: + # invoke reporead's primary method + read_repo(self.arch, self.path, {}) + finally: + logger.debug('Done updating database %s.', self.path) + with self.lock: + self.update_thread = None + self.updating = False + if self.run_again: + self.run_again = False + self._start_update_countdown() + + +class EventHandler(pyinotify.ProcessEvent): + '''Our main event handler which listens for database change events. Because + we are watching the whole directory, we filter down and only look at those + events dealing with files databases.''' + + def my_init(self, **kwargs): + self.databases = {} + self.arch_lookup = {} + + # we really want a single path to arch mapping, so massage the data + arch_paths = kwargs['arch_paths'] + for arch, paths in arch_paths.items(): + self.arch_lookup.update((path.rstrip('/'), arch) for path in paths) + + def process_default(self, event): + '''Primary event processing function which kicks off reporead timer + threads if a files database was updated.''' + if not event.name: + return + # screen to only the files we care about + if event.name.endswith('.files.tar.gz'): + path = event.pathname + stat = os.stat(path) + database = self.databases.get(path, None) + if database is None: + arch = self.arch_lookup.get(event.path, None) + if arch is None: + logger.warning( + 'Could not determine arch for %s, skipping update', + path) + return + database = Database(arch, path) + self.databases[path] = database + database.queue_for_update(stat.st_mtime) + + +# vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From d3b36e1ce992a8b70f4fe8fe9e8df74e835fb865 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 15 Nov 2011 22:03:01 -0600 Subject: reporead_inotify: cancel threads that haven't started yet on shutdown Signed-off-by: Dan McGee --- devel/management/commands/reporead_inotify.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead_inotify.py b/devel/management/commands/reporead_inotify.py index 135c0367..4c865ce1 100755 --- a/devel/management/commands/reporead_inotify.py +++ b/devel/management/commands/reporead_inotify.py @@ -54,6 +54,11 @@ class Command(BaseCommand): logger.info('Entering notifier loop') notifier.loop() + logger.info('Cancelling remaining threads...') + for thread in threading.enumerate(): + if hasattr(thread, 'cancel'): + thread.cancel() + def setup_notifier(self): '''Set up and configure the inotify machinery and logic. This takes the provided or default path_template and builds a list of -- cgit v1.2.3-2-g168b From c00e7e84045613ee2aa80f66b9972db971ab3f26 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 15 Nov 2011 23:43:42 -0600 Subject: reporead: clean up some debug logging Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index b4966834..45229524 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -36,6 +36,8 @@ logging.basicConfig( format='%(asctime)s -> %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', stream=sys.stderr) +TRACE = 5 +logging.addLevelName(TRACE, 'TRACE') logger = logging.getLogger() class Command(BaseCommand): @@ -368,7 +370,7 @@ def db_update(archname, reponame, pkgs, options): # packages in both database and in syncdb (update in database) pkg_in_both = syncset & dbset for p in [x for x in pkgs if x.name in pkg_in_both]: - logger.debug("Looking for package updates") + logger.debug("Checking package %s", p.name) dbp = dbdict[p.name] timestamp = None # for a force, we don't want to update the timestamp. @@ -406,7 +408,7 @@ def parse_info(iofile): continue elif line.startswith('%') and line.endswith('%'): blockname = line[1:-1].lower() - logger.debug("Parsing package block %s", blockname) + logger.log(TRACE, "Parsing package block %s", blockname) store[blockname] = [] elif blockname: store[blockname].append(line) @@ -456,7 +458,7 @@ def parse_repo(repopath): tarinfo.name) data_file.close() - logger.debug("Done parsing file %s", fname) + logger.debug("Done parsing file %s/%s", pkgid, fname) repodb.close() logger.info("Finished repo parsing, %d total packages", len(pkgs)) -- cgit v1.2.3-2-g168b From 404c4b400b2bd2a14e0363e33d66505c51903fe7 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 16 Nov 2011 12:48:36 -0600 Subject: reporead: a few small tweaks Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 45229524..ad76db4d 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -450,13 +450,14 @@ def parse_repo(repopath): continue data_file = repodb.extractfile(tarinfo) data_file = io.TextIOWrapper(io.BytesIO(data_file.read()), - encoding='utf=8') + encoding='UTF-8') try: pkgs[pkgid].populate(parse_info(data_file)) except UnicodeDecodeError: logger.warn("Could not correctly decode %s, skipping file", tarinfo.name) data_file.close() + del data_file logger.debug("Done parsing file %s/%s", pkgid, fname) @@ -498,10 +499,10 @@ def read_repo(primary_arch, repo_file, options): logger.warning("Package %s arch = %s", package.name, package.arch) del packages - logger.info('Starting database updates.') + logger.info('Starting database updates for %s.', repo_file) for arch in sorted(packages_arches.keys()): db_update(arch, repo, packages_arches[arch], options) - logger.info('Finished database updates.') + logger.info('Finished database updates for %s.', repo_file) return 0 # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From b1e406d73844d5a30344ca8ac855fe850c52bc2f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 16 Nov 2011 12:49:17 -0600 Subject: reporead_inotify: spin up read_repo() in separate thread This prevents memory usage from ballooning to absolutely huge values, such as when multiple threads kick off at the same time. The bulk of our memory allocation obviously comes in these threads and not the main threads, so being able to isolate them in processes helps a lot. Signed-off-by: Dan McGee --- devel/management/commands/reporead_inotify.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead_inotify.py b/devel/management/commands/reporead_inotify.py index 4c865ce1..ffd49b8f 100755 --- a/devel/management/commands/reporead_inotify.py +++ b/devel/management/commands/reporead_inotify.py @@ -15,6 +15,7 @@ correctly. """ import logging +import multiprocessing import os.path import pyinotify import sys @@ -133,6 +134,7 @@ class Database(object): return if self.update_thread: self.update_thread.cancel() + self.update_thread = None self._start_update_countdown() def update(self): @@ -142,8 +144,13 @@ class Database(object): self.updating = True try: - # invoke reporead's primary method - read_repo(self.arch, self.path, {}) + # invoke reporead's primary method. we do this in a separate + # process for memory conservation purposes; these processes grow + # rather large so it is best to free up the memory ASAP. + process = multiprocessing.Process(target=read_repo, + args=[self.arch, self.path, {}]) + process.start() + process.join() finally: logger.debug('Done updating database %s.', self.path) with self.lock: -- cgit v1.2.3-2-g168b From aa20c798ca8af365b2549591700e932a74d068b8 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 16 Nov 2011 13:02:35 -0600 Subject: reporead_inotify: close connection once we are done with it This prevents an otherwise idle connection from sitting around and being totally useless. Signed-off-by: Dan McGee --- devel/management/commands/reporead_inotify.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead_inotify.py b/devel/management/commands/reporead_inotify.py index ffd49b8f..acb53a54 100755 --- a/devel/management/commands/reporead_inotify.py +++ b/devel/management/commands/reporead_inotify.py @@ -23,6 +23,7 @@ import threading import time from django.core.management.base import BaseCommand, CommandError +from django.db import connection from main.models import Arch, Repo from .reporead import read_repo @@ -90,6 +91,11 @@ class Command(BaseCommand): raise CommandError('path template did not uniquely ' 'determine architecture for each file') + # this thread is done using the database; all future access is done in + # the spawned read_repo() processes, so close the otherwise completely + # idle connection. + connection.close() + # A proper atomic replacement of the database as done by rsync is type # IN_MOVED_TO. repo-add/remove will finish with a IN_CLOSE_WRITE. mask = pyinotify.IN_CLOSE_WRITE | pyinotify.IN_MOVED_TO -- cgit v1.2.3-2-g168b From 9d2fdbe5bc6a0d9ab2907b377056851fc5eb56c3 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 17 Nov 2011 10:17:55 -0600 Subject: reporead_inotify: nice the spawned subprocesses This prevents the reporead job from taking over time from more important processes; this is not a rush task. Signed-off-by: Dan McGee --- devel/management/commands/reporead_inotify.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead_inotify.py b/devel/management/commands/reporead_inotify.py index acb53a54..c74762eb 100755 --- a/devel/management/commands/reporead_inotify.py +++ b/devel/management/commands/reporead_inotify.py @@ -16,7 +16,7 @@ correctly. import logging import multiprocessing -import os.path +import os import pyinotify import sys import threading @@ -113,10 +113,11 @@ class Database(object): various bits of metadata and state representing the file path, when we last updated, how long our delay is before performing the update, whether we are updating now, etc.''' - def __init__(self, arch, path, delay=60.0): + def __init__(self, arch, path, delay=60.0, nice=3): self.arch = arch self.path = path self.delay = delay + self.nice = nice self.mtime = None self.last_import = None self.update_thread = None @@ -153,8 +154,12 @@ class Database(object): # invoke reporead's primary method. we do this in a separate # process for memory conservation purposes; these processes grow # rather large so it is best to free up the memory ASAP. - process = multiprocessing.Process(target=read_repo, - args=[self.arch, self.path, {}]) + def run(): + if self.nice != 0: + os.nice(self.nice) + read_repo(self.arch, self.path, {}) + + process = multiprocessing.Process(target=run) process.start() process.join() finally: -- cgit v1.2.3-2-g168b From a9819e3d715ce3e5c20c9665db9a6100f06ab562 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 17 Nov 2011 12:34:12 -0600 Subject: Ensure reporead is protected against simultaneous runs This adds a bunch of transaction magic and SELECT FOR UPDATE stuff to reporead to cope with the now-concurrent runs of reporead we get when invoked from our inotify-based updater. The collision occurs with 'any' architecture packages as both repo databases contain the new version, and the updates occur at exactly the same time. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 206 +++++++++++++++++----------------- 1 file changed, 106 insertions(+), 100 deletions(-) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index ad76db4d..b6bd8457 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -13,10 +13,6 @@ Example: ./manage.py reporead i686 /tmp/core.db.tar.gz """ -from django.core.management.base import BaseCommand, CommandError -from django.contrib.auth.models import User -from django.db import transaction - from collections import defaultdict import io import os @@ -27,6 +23,11 @@ import logging from datetime import datetime from optparse import make_option +from django.core.management.base import BaseCommand, CommandError +from django.contrib.auth.models import User +from django.db import connections, router, transaction +from django.db.utils import IntegrityError + from devel.utils import UserFinder from main.models import Arch, Package, PackageDepend, PackageFile, Repo from packages.models import Conflict, Provision, Replacement @@ -189,8 +190,6 @@ def create_multivalued(dbpkg, repopkg, db_attr, repo_attr): finder = UserFinder() def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): - db_score = 1 - if repopkg.base: dbpkg.pkgbase = repopkg.base else: @@ -214,7 +213,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.last_update = timestamp dbpkg.save() - db_score += populate_files(dbpkg, repopkg, force=force) + populate_files(dbpkg, repopkg, force=force) dbpkg.packagedepend_set.all().delete() for y in repopkg.depends: @@ -235,28 +234,23 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): create_multivalued(dbpkg, repopkg, 'groups', 'groups') create_multivalued(dbpkg, repopkg, 'licenses', 'license') - related_score = (len(repopkg.depends) + len(repopkg.optdepends) - + len(repopkg.conflicts) + len(repopkg.provides) - + len(repopkg.replaces) + len(repopkg.groups) - + len(repopkg.license)) - if related_score: - db_score += (related_score / 20) + 1 - return db_score +pkg_same_version = lambda pkg, dbpkg: pkg.ver == dbpkg.pkgver \ + and pkg.rel == dbpkg.pkgrel and pkg.epoch == dbpkg.epoch def populate_files(dbpkg, repopkg, force=False): if not force: - if dbpkg.pkgver != repopkg.ver or dbpkg.pkgrel != repopkg.rel \ - or dbpkg.epoch != repopkg.epoch: + if not pkg_same_version(repopkg, dbpkg): logger.info("DB version (%s) didn't match repo version " "(%s) for package %s, skipping file list addition", dbpkg.full_version, repopkg.full_version, dbpkg.pkgname) - return 0 + return if not dbpkg.files_last_update or not dbpkg.last_update: pass elif dbpkg.files_last_update > dbpkg.last_update: - return 0 + return + # only delete files if we are reading a DB that contains them if repopkg.has_files: dbpkg.packagefile_set.all().delete() @@ -275,30 +269,19 @@ def populate_files(dbpkg, repopkg, force=False): pkgfile.save(force_insert=True) dbpkg.files_last_update = datetime.utcnow() dbpkg.save() - return (len(repopkg.files) / 50) + 1 - return 0 - -class Batcher(object): - def __init__(self, threshold, start=0): - self.threshold = threshold - self.meter = start - def batch_commit(self, score): - """ - Track updates to the database and perform a commit if the batch - becomes sufficiently large. "Large" is defined by waiting for the - sum of scores to exceed the arbitrary threshold value; once it is - hit a commit is issued. - """ - self.meter += score - if self.meter > self.threshold: - logger.debug("Committing transaction, batch threshold hit") - transaction.commit() - self.meter = 0 +def select_pkg_for_update(dbpkg): + database = router.db_for_write(Package, instance=dbpkg) + connection = connections[database] + if 'sqlite' in connection.settings_dict['ENGINE'].lower(): + return dbpkg + new_pkg = Package.objects.raw( + 'SELECT * FROM packages WHERE id = %s FOR UPDATE', + [dbpkg.id]) + return list(new_pkg)[0] -@transaction.commit_on_success def db_update(archname, reponame, pkgs, options): """ Parses a list and updates the Arch dev database accordingly. @@ -310,88 +293,111 @@ def db_update(archname, reponame, pkgs, options): logger.info('Updating Arch: %s', archname) force = options.get('force', False) filesonly = options.get('filesonly', False) - repository = Repo.objects.get(name__iexact=reponame) - architecture = Arch.objects.get(name__iexact=archname) - # no-arg order_by() removes even the default ordering; we don't need it - dbpkgs = Package.objects.filter( - arch=architecture, repo=repository).order_by() - # This makes our inner loop where we find packages by name *way* more - # efficient by not having to go to the database for each package to - # SELECT them by name. - dbdict = dict([(pkg.pkgname, pkg) for pkg in dbpkgs]) - - logger.debug("Creating sets") - dbset = set(dbdict.keys()) - syncset = set([pkg.name for pkg in pkgs]) - logger.info("%d packages in current web DB", len(dbset)) - logger.info("%d packages in new updating db", len(syncset)) - in_sync_not_db = syncset - dbset - logger.info("%d packages in sync not db", len(in_sync_not_db)) - - # Try to catch those random package deletions that make Eric so unhappy. - if len(dbset): - dbpercent = 100.0 * len(syncset) / len(dbset) - else: - dbpercent = 0.0 - logger.info("DB package ratio: %.1f%%", dbpercent) - - # Fewer than 20 packages makes the percentage check unreliable, but it also - # means we expect the repo to fluctuate a lot. - msg = "Package database has %.1f%% the number of packages in the " \ - "web database" % dbpercent - if len(dbset) == 0 and len(syncset) == 0: - pass - elif not filesonly and \ - len(dbset) > 20 and dbpercent < 50.0 and \ - not repository.testing and not repository.staging: - logger.error(msg) - raise Exception(msg) - elif dbpercent < 75.0: - logger.warning(msg) - - batcher = Batcher(100) + + with transaction.commit_manually(): + repository = Repo.objects.get(name__iexact=reponame) + architecture = Arch.objects.get(name__iexact=archname) + # no-arg order_by() removes even the default ordering; we don't need it + dbpkgs = Package.objects.filter( + arch=architecture, repo=repository).order_by() + # This makes our inner loop where we find packages by name *way* more + # efficient by not having to go to the database for each package to + # SELECT them by name. + dbdict = dict((dbpkg.pkgname, dbpkg) for dbpkg in dbpkgs) + + logger.debug("Creating sets") + dbset = set(dbdict.keys()) + syncset = set([pkg.name for pkg in pkgs]) + logger.info("%d packages in current web DB", len(dbset)) + logger.info("%d packages in new updating db", len(syncset)) + in_sync_not_db = syncset - dbset + logger.info("%d packages in sync not db", len(in_sync_not_db)) + + # Try to catch those random package deletions that make Eric so unhappy. + if len(dbset): + dbpercent = 100.0 * len(syncset) / len(dbset) + else: + dbpercent = 0.0 + logger.info("DB package ratio: %.1f%%", dbpercent) + + # Fewer than 20 packages makes the percentage check unreliable, but it also + # means we expect the repo to fluctuate a lot. + msg = "Package database has %.1f%% the number of packages in the " \ + "web database" % dbpercent + if len(dbset) == 0 and len(syncset) == 0: + pass + elif not filesonly and \ + len(dbset) > 20 and dbpercent < 50.0 and \ + not repository.testing and not repository.staging: + logger.error(msg) + raise Exception(msg) + elif dbpercent < 75.0: + logger.warning(msg) + + # If isolation level is repeatable-read, we need to ensure each package + # update starts a new transaction and re-queries the database as necessary + # to guard against simultaneous updates + transaction.commit() if not filesonly: # packages in syncdb and not in database (add to database) - for p in [x for x in pkgs if x.name in in_sync_not_db]: - logger.info("Adding package %s", p.name) - pkg = Package(pkgname=p.name, arch=architecture, repo=repository) - score = populate_pkg(pkg, p, timestamp=datetime.utcnow()) - batcher.batch_commit(score) + for pkg in (pkg for pkg in pkgs if pkg.name in in_sync_not_db): + logger.info("Adding package %s", pkg.name) + dbpkg = Package(pkgname=pkg.name, arch=architecture, repo=repository) + try: + with transaction.commit_on_success(): + populate_pkg(dbpkg, pkg, timestamp=datetime.utcnow()) + except IntegrityError: + logger.warning("Could not add package %s; " + "not fatal if another thread beat us to it.", + pkg.name, exc_info=True) # packages in database and not in syncdb (remove from database) - in_db_not_sync = dbset - syncset - for p in in_db_not_sync: - logger.info("Removing package %s", p) - dbp = dbdict[p] - dbp.delete() - batcher.batch_commit(1) + for pkgname in (dbset - syncset): + logger.info("Removing package %s", pkgname) + dbpkg = dbdict[pkgname] + with transaction.commit_on_success(): + # no race condition here as long as simultaneous threads both + # issue deletes; second delete will be a no-op + dbpkg.delete() # packages in both database and in syncdb (update in database) pkg_in_both = syncset & dbset - for p in [x for x in pkgs if x.name in pkg_in_both]: - logger.debug("Checking package %s", p.name) - dbp = dbdict[p.name] + for pkg in (x for x in pkgs if x.name in pkg_in_both): + logger.debug("Checking package %s", pkg.name) + dbpkg = dbdict[pkg.name] timestamp = None # for a force, we don't want to update the timestamp. # for a non-force, we don't want to do anything at all. if filesonly: pass - elif p.ver == dbp.pkgver and p.rel == dbp.pkgrel \ - and p.epoch == dbp.epoch: + elif pkg_same_version(pkg, dbpkg): if not force: continue else: timestamp = datetime.utcnow() + # The odd select_for_update song and dance here are to ensure + # simultaneous updates don't happen on a package, causing + # files/depends/all related items to be double-imported. if filesonly: - logger.debug("Checking files for package %s", p.name) - score = populate_files(dbp, p, force=force) + with transaction.commit_on_success(): + # TODO Django 1.4 select_for_update() will work once released + dbpkg = select_pkg_for_update(dbpkg) + if pkg_same_version(pkg, dbpkg): + logger.debug("Package %s was already updated", pkg.name) + continue + logger.debug("Checking files for package %s", pkg.name) + populate_files(dbpkg, pkg, force=force) else: - logger.info("Updating package %s", p.name) - score = populate_pkg(dbp, p, force=force, timestamp=timestamp) - - batcher.batch_commit(score) + with transaction.commit_on_success(): + # TODO Django 1.4 select_for_update() will work once released + dbpkg = select_pkg_for_update(dbpkg) + if pkg_same_version(pkg, dbpkg): + logger.debug("Package %s was already updated", pkg.name) + continue + logger.info("Updating package %s", pkg.name) + populate_pkg(dbpkg, pkg, force=force, timestamp=timestamp) logger.info('Finished updating Arch: %s', archname) -- cgit v1.2.3-2-g168b From 2cb4f97bb235217d6e56deded1444f5e84f08b71 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 17 Nov 2011 13:36:27 -0600 Subject: reporead: don't trim pkgdesc length Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index b6bd8457..cf101d97 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -73,7 +73,7 @@ class Command(BaseCommand): class Pkg(object): """An interim 'container' object for holding Arch package data.""" - bare = ( 'name', 'base', 'arch', 'desc', 'filename', + bare = ( 'name', 'base', 'arch', 'filename', 'md5sum', 'sha256sum', 'url', 'packager' ) number = ( 'csize', 'isize' ) collections = ( 'depends', 'optdepends', 'conflicts', @@ -101,8 +101,8 @@ class Pkg(object): setattr(self, k, v[0][:254]) elif k in self.number: setattr(self, k, long(v[0])) - elif k == 'pgpsig': - # do NOT prune this value at all + elif k in ('desc', 'pgpsig'): + # do NOT prune these values at all setattr(self, k, v[0]) elif k == 'version': match = self.version_re.match(v[0]) -- cgit v1.2.3-2-g168b From ac34e358103ee718369692b9ba5afe6830a1df92 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 30 Nov 2011 12:25:54 -0600 Subject: reporead: fix filesonly needs update checks This was broken after the select for update changes. We really should split the whole filesonly update into another method instead of the current shotgun approach with conditionals everywhere. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index cf101d97..f8cc2034 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -382,11 +382,13 @@ def db_update(archname, reponame, pkgs, options): # files/depends/all related items to be double-imported. if filesonly: with transaction.commit_on_success(): + if not dbpkg.files_last_update or not dbpkg.last_update: + pass + elif dbpkg.files_last_update > dbpkg.last_update: + logger.debug("Files for %s are up to date", pkg.name) + continue # TODO Django 1.4 select_for_update() will work once released dbpkg = select_pkg_for_update(dbpkg) - if pkg_same_version(pkg, dbpkg): - logger.debug("Package %s was already updated", pkg.name) - continue logger.debug("Checking files for package %s", pkg.name) populate_files(dbpkg, pkg, force=force) else: -- cgit v1.2.3-2-g168b From dca9fd2ea687b8d20fd5c39c1449846ddef1e3c2 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 30 Nov 2011 13:04:15 -0600 Subject: reporead: split out filesonly update method This removes a bunch of the conditional logic at a slight cost of some code duplication. However, the methods and madness is now much easier to follow. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 170 +++++++++++++++++++--------------- 1 file changed, 95 insertions(+), 75 deletions(-) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index f8cc2034..e4ba8580 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -282,40 +282,20 @@ def select_pkg_for_update(dbpkg): return list(new_pkg)[0] -def db_update(archname, reponame, pkgs, options): - """ - Parses a list and updates the Arch dev database accordingly. - - Arguments: - pkgs -- A list of Pkg objects. - - """ - logger.info('Updating Arch: %s', archname) - force = options.get('force', False) - filesonly = options.get('filesonly', False) - +def update_common(archname, reponame, pkgs, sanity_check=True): with transaction.commit_manually(): repository = Repo.objects.get(name__iexact=reponame) architecture = Arch.objects.get(name__iexact=archname) # no-arg order_by() removes even the default ordering; we don't need it dbpkgs = Package.objects.filter( arch=architecture, repo=repository).order_by() - # This makes our inner loop where we find packages by name *way* more - # efficient by not having to go to the database for each package to - # SELECT them by name. - dbdict = dict((dbpkg.pkgname, dbpkg) for dbpkg in dbpkgs) - - logger.debug("Creating sets") - dbset = set(dbdict.keys()) - syncset = set([pkg.name for pkg in pkgs]) - logger.info("%d packages in current web DB", len(dbset)) - logger.info("%d packages in new updating db", len(syncset)) - in_sync_not_db = syncset - dbset - logger.info("%d packages in sync not db", len(in_sync_not_db)) + + logger.info("%d packages in current web DB", len(dbpkgs)) + logger.info("%d packages in new updating DB", len(pkgs)) # Try to catch those random package deletions that make Eric so unhappy. - if len(dbset): - dbpercent = 100.0 * len(syncset) / len(dbset) + if len(dbpkgs): + dbpercent = 100.0 * len(pkgs) / len(dbpkgs) else: dbpercent = 0.0 logger.info("DB package ratio: %.1f%%", dbpercent) @@ -324,11 +304,13 @@ def db_update(archname, reponame, pkgs, options): # means we expect the repo to fluctuate a lot. msg = "Package database has %.1f%% the number of packages in the " \ "web database" % dbpercent - if len(dbset) == 0 and len(syncset) == 0: + if not sanity_check: + pass + elif repository.testing or repository.staging: pass - elif not filesonly and \ - len(dbset) > 20 and dbpercent < 50.0 and \ - not repository.testing and not repository.staging: + elif len(dbpkgs) == 0 and len(pkgs) == 0: + pass + elif len(dbpkgs) > 20 and dbpercent < 50.0: logger.error(msg) raise Exception(msg) elif dbpercent < 75.0: @@ -339,27 +321,45 @@ def db_update(archname, reponame, pkgs, options): # to guard against simultaneous updates transaction.commit() - if not filesonly: - # packages in syncdb and not in database (add to database) - for pkg in (pkg for pkg in pkgs if pkg.name in in_sync_not_db): - logger.info("Adding package %s", pkg.name) - dbpkg = Package(pkgname=pkg.name, arch=architecture, repo=repository) - try: - with transaction.commit_on_success(): - populate_pkg(dbpkg, pkg, timestamp=datetime.utcnow()) - except IntegrityError: - logger.warning("Could not add package %s; " - "not fatal if another thread beat us to it.", - pkg.name, exc_info=True) - - # packages in database and not in syncdb (remove from database) - for pkgname in (dbset - syncset): - logger.info("Removing package %s", pkgname) - dbpkg = dbdict[pkgname] + return dbpkgs + +def db_update(archname, reponame, pkgs, force=False): + """ + Parses a list of packages and updates the packages database accordingly. + """ + logger.info('Updating %s (%s)', reponame, archname) + dbpkgs = update_common(archname, reponame, pkgs, True) + + # This makes our inner loop where we find packages by name *way* more + # efficient by not having to go to the database for each package to + # SELECT them by name. + dbdict = dict((dbpkg.pkgname, dbpkg) for dbpkg in dbpkgs) + + dbset = set(dbdict.keys()) + syncset = set([pkg.name for pkg in pkgs]) + + in_sync_not_db = syncset - dbset + logger.info("%d packages in sync not db", len(in_sync_not_db)) + # packages in syncdb and not in database (add to database) + for pkg in (pkg for pkg in pkgs if pkg.name in in_sync_not_db): + logger.info("Adding package %s", pkg.name) + dbpkg = Package(pkgname=pkg.name, arch=architecture, repo=repository) + try: with transaction.commit_on_success(): - # no race condition here as long as simultaneous threads both - # issue deletes; second delete will be a no-op - dbpkg.delete() + populate_pkg(dbpkg, pkg, timestamp=datetime.utcnow()) + except IntegrityError: + logger.warning("Could not add package %s; " + "not fatal if another thread beat us to it.", + pkg.name, exc_info=True) + + # packages in database and not in syncdb (remove from database) + for pkgname in (dbset - syncset): + logger.info("Removing package %s", pkgname) + dbpkg = dbdict[pkgname] + with transaction.commit_on_success(): + # no race condition here as long as simultaneous threads both + # issue deletes; second delete will be a no-op + dbpkg.delete() # packages in both database and in syncdb (update in database) pkg_in_both = syncset & dbset @@ -369,9 +369,7 @@ def db_update(archname, reponame, pkgs, options): timestamp = None # for a force, we don't want to update the timestamp. # for a non-force, we don't want to do anything at all. - if filesonly: - pass - elif pkg_same_version(pkg, dbpkg): + if pkg_same_version(pkg, dbpkg): if not force: continue else: @@ -380,28 +378,45 @@ def db_update(archname, reponame, pkgs, options): # The odd select_for_update song and dance here are to ensure # simultaneous updates don't happen on a package, causing # files/depends/all related items to be double-imported. - if filesonly: - with transaction.commit_on_success(): - if not dbpkg.files_last_update or not dbpkg.last_update: - pass - elif dbpkg.files_last_update > dbpkg.last_update: - logger.debug("Files for %s are up to date", pkg.name) - continue - # TODO Django 1.4 select_for_update() will work once released - dbpkg = select_pkg_for_update(dbpkg) - logger.debug("Checking files for package %s", pkg.name) - populate_files(dbpkg, pkg, force=force) - else: - with transaction.commit_on_success(): - # TODO Django 1.4 select_for_update() will work once released - dbpkg = select_pkg_for_update(dbpkg) - if pkg_same_version(pkg, dbpkg): - logger.debug("Package %s was already updated", pkg.name) - continue - logger.info("Updating package %s", pkg.name) - populate_pkg(dbpkg, pkg, force=force, timestamp=timestamp) + with transaction.commit_on_success(): + # TODO Django 1.4 select_for_update() will work once released + dbpkg = select_pkg_for_update(dbpkg) + if pkg_same_version(pkg, dbpkg): + logger.debug("Package %s was already updated", pkg.name) + continue + logger.info("Updating package %s", pkg.name) + populate_pkg(dbpkg, pkg, force=force, timestamp=timestamp) + + logger.info('Finished updating arch: %s', archname) + + +def filesonly_update(archname, reponame, pkgs, force=False): + """ + Parses a list of packages and updates the packages database accordingly. + """ + logger.info('Updating files for %s (%s)', reponame, archname) + dbpkgs = update_common(archname, reponame, pkgs, False) + dbdict = dict((dbpkg.pkgname, dbpkg) for dbpkg in dbpkgs) + dbset = set(dbdict.keys()) + + for pkg in (pkg for pkg in pkgs if pkg.name in dbset): + dbpkg = dbdict[pkg.name] + + # The odd select_for_update song and dance here are to ensure + # simultaneous updates don't happen on a package, causing + # files to be double-imported. + with transaction.commit_on_success(): + if not dbpkg.files_last_update or not dbpkg.last_update: + pass + elif dbpkg.files_last_update > dbpkg.last_update: + logger.debug("Files for %s are up to date", pkg.name) + continue + # TODO Django 1.4 select_for_update() will work once released + dbpkg = select_pkg_for_update(dbpkg) + logger.debug("Checking files for package %s", pkg.name) + populate_files(dbpkg, pkg, force=force) - logger.info('Finished updating Arch: %s', archname) + logger.info('Finished updating arch: %s', archname) def parse_info(iofile): @@ -490,6 +505,8 @@ def read_repo(primary_arch, repo_file, options): """ # always returns an Arch object, regardless of what is passed in primary_arch = locate_arch(primary_arch) + force = options.get('force', False) + filesonly = options.get('filesonly', False) repo, packages = parse_repo(repo_file) @@ -509,7 +526,10 @@ def read_repo(primary_arch, repo_file, options): logger.info('Starting database updates for %s.', repo_file) for arch in sorted(packages_arches.keys()): - db_update(arch, repo, packages_arches[arch], options) + if filesonly: + filesonly_update(arch, repo, packages_arches[arch], force) + else: + db_update(arch, repo, packages_arches[arch], force) logger.info('Finished database updates for %s.', repo_file) return 0 -- cgit v1.2.3-2-g168b From 4590196d79273c49172e2da74e7a7b31e59d7a27 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 30 Nov 2011 14:07:35 -0600 Subject: Integrate master key into rest of site Signed-off-by: Dan McGee --- devel/management/commands/generate_keyring.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'devel/management/commands') diff --git a/devel/management/commands/generate_keyring.py b/devel/management/commands/generate_keyring.py index 35ab8874..a3a764b4 100644 --- a/devel/management/commands/generate_keyring.py +++ b/devel/management/commands/generate_keyring.py @@ -13,6 +13,7 @@ import logging import subprocess import sys +from devel.models import MasterKey from main.models import UserProfile logging.basicConfig( @@ -48,11 +49,14 @@ def generate_keyring(keyserver, keyring): pgp_key__isnull=False).extra(where=["pgp_key != ''"]).values_list( "pgp_key", flat=True) logger.info("%d keys fetched from user profiles", len(key_ids)) + master_key_ids = MasterKey.objects.values_list("pgp_key", flat=True) + logger.info("%d keys fetched from master keys", len(master_key_ids)) gpg_cmd = ["gpg", "--no-default-keyring", "--keyring", keyring, "--keyserver", keyserver, "--recv-keys"] logger.info("running command: %r", gpg_cmd) gpg_cmd.extend(key_ids) + gpg_cmd.extend(master_key_ids) subprocess.check_call(gpg_cmd) logger.info("keyring at %s successfully updated", keyring) -- cgit v1.2.3-2-g168b From 7c84bea7dabdfbc307d373620b00214777d91a97 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 30 Nov 2011 14:25:51 -0600 Subject: Allow generation of an ownertrust file Signed-off-by: Dan McGee --- devel/management/commands/generate_keyring.py | 29 ++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'devel/management/commands') diff --git a/devel/management/commands/generate_keyring.py b/devel/management/commands/generate_keyring.py index a3a764b4..062c738b 100644 --- a/devel/management/commands/generate_keyring.py +++ b/devel/management/commands/generate_keyring.py @@ -24,7 +24,7 @@ logging.basicConfig( logger = logging.getLogger() class Command(BaseCommand): - args = " " + args = " [ownertrust_path]" help = "Assemble a GPG keyring with all known developer keys." def handle(self, *args, **options): @@ -36,10 +36,14 @@ class Command(BaseCommand): elif v == 2: logger.level = logging.DEBUG - if len(args) != 2: + if len(args) < 2: raise CommandError("keyserver and keyring_path must be provided") - return generate_keyring(args[0], args[1]) + generate_keyring(args[0], args[1]) + + if len(args) > 2: + generate_ownertrust(args[2]) + def generate_keyring(keyserver, keyring): logger.info("getting all known key IDs") @@ -60,4 +64,23 @@ def generate_keyring(keyserver, keyring): subprocess.check_call(gpg_cmd) logger.info("keyring at %s successfully updated", keyring) + +TRUST_LEVELS = { + 'unknown': 0, + 'expired': 1, + 'undefined': 2, + 'never': 3, + 'marginal': 4, + 'fully': 5, + 'ultimate': 6, +} + + +def generate_ownertrust(trust_path): + master_key_ids = MasterKey.objects.values_list("pgp_key", flat=True) + with open(trust_path, "w") as trustfile: + for key_id in master_key_ids: + trustfile.write("%s:%d:\n" % (key_id, TRUST_LEVELS['marginal'])) + logger.info("trust file at %s created or overwritten", trust_path) + # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From 4d02cd5b5d4437dd1543e2d45044db72da1989f4 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 30 Nov 2011 23:56:12 -0600 Subject: reporead: fix not defined variable Way to fail at refactoring, Dan. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'devel/management/commands') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index e4ba8580..c444538b 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -329,6 +329,8 @@ def db_update(archname, reponame, pkgs, force=False): """ logger.info('Updating %s (%s)', reponame, archname) dbpkgs = update_common(archname, reponame, pkgs, True) + repository = Repo.objects.get(name__iexact=reponame) + architecture = Arch.objects.get(name__iexact=archname) # This makes our inner loop where we find packages by name *way* more # efficient by not having to go to the database for each package to -- cgit v1.2.3-2-g168b