diff options
Diffstat (limited to 'devel/management/commands/reporead.py')
-rw-r--r-- | devel/management/commands/reporead.py | 176 |
1 files changed, 118 insertions, 58 deletions
diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 09e48559..a8875c7e 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -35,12 +35,11 @@ try: except ImportError: pass -from logging import ERROR, WARNING, INFO, DEBUG - from main.models import Arch, Package, PackageDepend, PackageFile, Repo +from packages.models import Conflict, Provision, Replacement logging.basicConfig( - level=WARNING, + level=logging.WARNING, format='%(asctime)s -> %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S', stream=sys.stderr) @@ -69,11 +68,11 @@ class Command(BaseCommand): v = int(options.get('verbosity', 0)) if v == 0: - logger.level = ERROR + logger.level = logging.ERROR elif v == 1: - logger.level = INFO + logger.level = logging.INFO elif v == 2: - logger.level = DEBUG + logger.level = logging.DEBUG import signal, traceback handler = lambda sig, stack: traceback.print_stack(stack) @@ -88,13 +87,23 @@ class Pkg(object): bare = ( 'name', 'base', 'arch', 'desc', 'filename', 'md5sum', 'url', 'builddate', 'packager' ) number = ( 'csize', 'isize' ) + collections = ( 'depends', 'optdepends', 'conflicts', + 'provides', 'replaces', 'groups', 'license', 'files' ) + + version_re = re.compile(r'^((\d+):)?(.+)-([^-]+)$') def __init__(self, repo): self.repo = repo self.ver = None self.rel = None + self.epoch = 0 for k in self.bare + self.number: setattr(self, k, None) + for k in self.collections: + setattr(self, k, ()) + # So we can tell the diffence between a package with no files, and a DB + # without files entries + self.has_files = False def populate(self, values): for k, v in values.iteritems(): @@ -103,16 +112,26 @@ class Pkg(object): setattr(self, k, v[0][:254]) elif k in self.number: setattr(self, k, long(v[0])) - elif k == 'force': - setattr(self, k, True) elif k == 'version': - ver, rel = v[0].rsplit('-') - setattr(self, 'ver', ver) - setattr(self, 'rel', rel) + match = self.version_re.match(v[0]) + self.ver = match.group(3) + self.rel = match.group(4) + if match.group(2): + self.epoch = int(match.group(2)) + elif k == 'files': + self.files = v + self.has_files = True else: - # files, depends, etc. + # anything left in collections setattr(self, k, v) + @property + def full_version(self): + '''Very similar to the main.models.Package method.''' + if self.epoch > 0: + return u'%d:%s-%s' % (self.epoch, self.ver, self.rel) + return u'%s-%s' % (self.ver, self.rel) + def find_user(userstring): ''' @@ -163,20 +182,58 @@ def find_user(userstring): # lookup more than strictly necessary. find_user.cache = {} +DEPEND_RE = re.compile(r"^(.+?)((>=|<=|=|>|<)(.*))?$") + def create_depend(package, dep_str, optional=False): depend = PackageDepend(pkg=package, optional=optional) # lop off any description first parts = dep_str.split(':', 1) if len(parts) > 1: depend.description = parts[1].strip() - match = re.match(r"^(.+?)((>=|<=|=|>|<)(.*))?$", parts[0].strip()) + match = DEPEND_RE.match(parts[0].strip()) if match: depend.depname = match.group(1) if match.group(2): depend.depvcmp = match.group(2) + else: + logger.warning('Package %s had unparsable depend string %s', + package.pkgname, dep_str) + return None depend.save(force_insert=True) return depend +def create_related(model, package, rel_str, equals_only=False): + related = model(pkg=package) + match = DEPEND_RE.match(rel_str) + if match: + related.name = match.group(1) + if match.group(3): + comp = match.group(3) + if not equals_only: + related.comparison = comp + elif comp != '=': + logger.warning( + 'Package %s had unexpected comparison operator %s for %s in %s', + package.pkgname, comp, model.__name__, rel_str) + if match.group(4): + related.version = match.group(4) + else: + logger.warning('Package %s had unparsable %s string %s', + package.pkgname, model.___name__, rel_str) + return None + related.save(force_insert=True) + return related + +def create_multivalued(dbpkg, repopkg, db_attr, repo_attr): + '''Populate the simplest of multivalued attributes. These are those that + only deal with a 'name' attribute, such as licenses, groups, etc. The input + and output objects and attribute names are specified, and everything is + done via getattr().''' + collection = getattr(dbpkg, db_attr) + collection.all().delete() + for name in getattr(repopkg, repo_attr): + collection.create(name=name) + def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): if repopkg.base: dbpkg.pkgbase = repopkg.base @@ -184,6 +241,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.pkgbase = repopkg.name dbpkg.pkgver = repopkg.ver dbpkg.pkgrel = repopkg.rel + dbpkg.epoch = repopkg.epoch dbpkg.pkgdesc = repopkg.desc dbpkg.url = repopkg.url dbpkg.filename = repopkg.filename @@ -210,38 +268,39 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): populate_files(dbpkg, repopkg, force=force) dbpkg.packagedepend_set.all().delete() - if hasattr(repopkg, 'depends'): - for y in repopkg.depends: - dep = create_depend(dbpkg, y) - if hasattr(repopkg, 'optdepends'): - for y in repopkg.optdepends: - dep = create_depend(dbpkg, y, True) + for y in repopkg.depends: + create_depend(dbpkg, y) + for y in repopkg.optdepends: + create_depend(dbpkg, y, True) - dbpkg.groups.all().delete() - if hasattr(repopkg, 'groups'): - for y in repopkg.groups: - dbpkg.groups.create(name=y) + dbpkg.conflicts.all().delete() + for y in repopkg.conflicts: + create_related(Conflict, dbpkg, y) + dbpkg.provides.all().delete() + for y in repopkg.provides: + create_related(Provision, dbpkg, y, equals_only=True) + dbpkg.replaces.all().delete() + for y in repopkg.replaces: + create_related(Replacement, dbpkg, y) - dbpkg.licenses.all().delete() - if hasattr(repopkg, 'license'): - for y in repopkg.license: - dbpkg.licenses.create(name=y) + create_multivalued(dbpkg, repopkg, 'groups', 'groups') + create_multivalued(dbpkg, repopkg, 'licenses', 'license') def populate_files(dbpkg, repopkg, force=False): if not force: - if dbpkg.pkgver != repopkg.ver or dbpkg.pkgrel != repopkg.rel: - logger.info("db version (%s-%s) didn't match repo version (%s-%s) " - "for package %s, skipping file list addition", - dbpkg.pkgver, dbpkg.pkgrel, repopkg.ver, repopkg.rel, - dbpkg.pkgname) + if dbpkg.pkgver != repopkg.ver or dbpkg.pkgrel != repopkg.rel \ + or dbpkg.epoch != repopkg.epoch: + logger.info("DB version (%s) didn't match repo version " + "(%s) for package %s, skipping file list addition", + dbpkg.full_version, repopkg.full_version, dbpkg.pkgname) return if not dbpkg.files_last_update or not dbpkg.last_update: pass elif dbpkg.files_last_update > dbpkg.last_update: return # only delete files if we are reading a DB that contains them - if hasattr(repopkg, 'files'): + if repopkg.has_files: dbpkg.packagefile_set.all().delete() logger.info("adding %d files for package %s", len(repopkg.files), dbpkg.pkgname) @@ -255,8 +314,8 @@ def populate_files(dbpkg, repopkg, force=False): is_directory=(filename is None), directory=dirname + '/', filename=filename) - pkgfile.save() - dbpkg.files_last_update = datetime.now() + pkgfile.save(force_insert=True) + dbpkg.files_last_update = datetime.utcnow() dbpkg.save() @transaction.commit_on_success @@ -273,25 +332,23 @@ def db_update(archname, reponame, pkgs, options): filesonly = options.get('filesonly', False) repository = Repo.objects.get(name__iexact=reponame) architecture = Arch.objects.get(name__iexact=archname) - dbpkgs = Package.objects.filter(arch=architecture, repo=repository) - # It makes sense to fully evaluate our DB query now because we will - # be using 99% of the objects in our "in both sets" loop. Force eval - # by calling list() on the QuerySet. - list(dbpkgs) + # no-arg order_by() removes even the default ordering; we don't need it + dbpkgs = Package.objects.filter( + arch=architecture, repo=repository).order_by() # This makes our inner loop where we find packages by name *way* more # efficient by not having to go to the database for each package to # SELECT them by name. dbdict = dict([(pkg.pkgname, pkg) for pkg in dbpkgs]) logger.debug("Creating sets") - dbset = set([pkg.pkgname for pkg in dbpkgs]) + dbset = set(dbdict.keys()) syncset = set([pkg.name for pkg in pkgs]) logger.info("%d packages in current web DB", len(dbset)) logger.info("%d packages in new updating db", len(syncset)) in_sync_not_db = syncset - dbset logger.info("%d packages in sync not db", len(in_sync_not_db)) - # Try to catch those random orphaning issues that make Eric so unhappy. + # Try to catch those random package deletions that make Eric so unhappy. if len(dbset): dbpercent = 100.0 * len(syncset) / len(dbset) else: @@ -302,12 +359,14 @@ def db_update(archname, reponame, pkgs, options): # means we expect the repo to fluctuate a lot. msg = "Package database has %.1f%% the number of packages in the " \ "web database" % dbpercent - if not filesonly and \ + if len(dbset) == 0 and len(syncset) == 0: + pass + elif not filesonly and \ len(dbset) > 20 and dbpercent < 50.0 and \ - not repository.testing: + not repository.testing and not repository.staging: logger.error(msg) raise Exception(msg) - if dbpercent < 75.0: + elif dbpercent < 75.0: logger.warning(msg) if not filesonly: @@ -315,14 +374,14 @@ def db_update(archname, reponame, pkgs, options): for p in [x for x in pkgs if x.name in in_sync_not_db]: logger.info("Adding package %s", p.name) pkg = Package(pkgname = p.name, arch = architecture, repo = repository) - populate_pkg(pkg, p, timestamp=datetime.now()) + populate_pkg(pkg, p, timestamp=datetime.utcnow()) # packages in database and not in syncdb (remove from database) in_db_not_sync = dbset - syncset for p in in_db_not_sync: logger.info("Removing package %s from database", p) - Package.objects.get( - pkgname=p, arch=architecture, repo=repository).delete() + dbp = dbdict[p] + dbp.delete() # packages in both database and in syncdb (update in database) pkg_in_both = syncset & dbset @@ -334,11 +393,12 @@ def db_update(archname, reponame, pkgs, options): # for a non-force, we don't want to do anything at all. if filesonly: pass - elif p.ver == dbp.pkgver and p.rel == dbp.pkgrel: + elif p.ver == dbp.pkgver and p.rel == dbp.pkgrel \ + and p.epoch == dbp.epoch: if not force: continue else: - timestamp = datetime.now() + timestamp = datetime.utcnow() if filesonly: logger.debug("Checking files for package %s in database", p.name) populate_files(dbp, p, force=force) @@ -421,10 +481,9 @@ def parse_repo(repopath): logger.info("Finished repo parsing, %d total packages", len(pkgs)) return (reponame, pkgs.values()) -def validate_arch(arch): +def validate_arch(archname): "Check if arch is valid." - available_arches = [x.name for x in Arch.objects.all()] - return arch in available_arches + return Arch.objects.filter(name__iexact=archname).exists() def read_repo(primary_arch, repo_file, options): """ @@ -432,21 +491,22 @@ def read_repo(primary_arch, repo_file, options): """ repo, packages = parse_repo(repo_file) - # sort packages by arch -- to handle noarch stuff + # group packages by arch -- to handle noarch stuff packages_arches = {} - packages_arches['any'] = [] + for arch in Arch.objects.filter(agnostic=True): + packages_arches[arch.name] = [] packages_arches[primary_arch] = [] for package in packages: - if package.arch in ('any', primary_arch): + if package.arch in packages_arches: packages_arches[package.arch].append(package) else: # we don't include mis-arched packages logger.warning("Package %s arch = %s", package.name,package.arch) logger.info('Starting database updates.') - for (arch, pkgs) in packages_arches.items(): - db_update(arch, repo, pkgs, options) + for arch in sorted(packages_arches.keys()): + db_update(arch, repo, packages_arches[arch], options) logger.info('Finished database updates.') return 0 |