summaryrefslogtreecommitdiff
path: root/devel/management/commands/reporead.py
diff options
context:
space:
mode:
authorParabola <dev@list.parabolagnulinux.org>2011-05-21 05:41:26 +0000
committerParabola <dev@list.parabolagnulinux.org>2011-05-21 05:41:26 +0000
commit62059d83aea71ac7bde8902b20221e52c86a810b (patch)
tree758bd028d43a30893ba6261f39b1ebdbebbbf6e3 /devel/management/commands/reporead.py
parent081223981aa520f792757a1776588756a4107fd4 (diff)
parentfd9ecb6eb1c8ee56adfbb58640d7a98baa6cd62c (diff)
Merge branch 'master' of /srv/git/projects/parabolaweb
Diffstat (limited to 'devel/management/commands/reporead.py')
-rw-r--r--devel/management/commands/reporead.py176
1 files changed, 118 insertions, 58 deletions
diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py
index 09e48559..a8875c7e 100644
--- a/devel/management/commands/reporead.py
+++ b/devel/management/commands/reporead.py
@@ -35,12 +35,11 @@ try:
except ImportError:
pass
-from logging import ERROR, WARNING, INFO, DEBUG
-
from main.models import Arch, Package, PackageDepend, PackageFile, Repo
+from packages.models import Conflict, Provision, Replacement
logging.basicConfig(
- level=WARNING,
+ level=logging.WARNING,
format='%(asctime)s -> %(levelname)s: %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
stream=sys.stderr)
@@ -69,11 +68,11 @@ class Command(BaseCommand):
v = int(options.get('verbosity', 0))
if v == 0:
- logger.level = ERROR
+ logger.level = logging.ERROR
elif v == 1:
- logger.level = INFO
+ logger.level = logging.INFO
elif v == 2:
- logger.level = DEBUG
+ logger.level = logging.DEBUG
import signal, traceback
handler = lambda sig, stack: traceback.print_stack(stack)
@@ -88,13 +87,23 @@ class Pkg(object):
bare = ( 'name', 'base', 'arch', 'desc', 'filename',
'md5sum', 'url', 'builddate', 'packager' )
number = ( 'csize', 'isize' )
+ collections = ( 'depends', 'optdepends', 'conflicts',
+ 'provides', 'replaces', 'groups', 'license', 'files' )
+
+ version_re = re.compile(r'^((\d+):)?(.+)-([^-]+)$')
def __init__(self, repo):
self.repo = repo
self.ver = None
self.rel = None
+ self.epoch = 0
for k in self.bare + self.number:
setattr(self, k, None)
+ for k in self.collections:
+ setattr(self, k, ())
+ # So we can tell the diffence between a package with no files, and a DB
+ # without files entries
+ self.has_files = False
def populate(self, values):
for k, v in values.iteritems():
@@ -103,16 +112,26 @@ class Pkg(object):
setattr(self, k, v[0][:254])
elif k in self.number:
setattr(self, k, long(v[0]))
- elif k == 'force':
- setattr(self, k, True)
elif k == 'version':
- ver, rel = v[0].rsplit('-')
- setattr(self, 'ver', ver)
- setattr(self, 'rel', rel)
+ match = self.version_re.match(v[0])
+ self.ver = match.group(3)
+ self.rel = match.group(4)
+ if match.group(2):
+ self.epoch = int(match.group(2))
+ elif k == 'files':
+ self.files = v
+ self.has_files = True
else:
- # files, depends, etc.
+ # anything left in collections
setattr(self, k, v)
+ @property
+ def full_version(self):
+ '''Very similar to the main.models.Package method.'''
+ if self.epoch > 0:
+ return u'%d:%s-%s' % (self.epoch, self.ver, self.rel)
+ return u'%s-%s' % (self.ver, self.rel)
+
def find_user(userstring):
'''
@@ -163,20 +182,58 @@ def find_user(userstring):
# lookup more than strictly necessary.
find_user.cache = {}
+DEPEND_RE = re.compile(r"^(.+?)((>=|<=|=|>|<)(.*))?$")
+
def create_depend(package, dep_str, optional=False):
depend = PackageDepend(pkg=package, optional=optional)
# lop off any description first
parts = dep_str.split(':', 1)
if len(parts) > 1:
depend.description = parts[1].strip()
- match = re.match(r"^(.+?)((>=|<=|=|>|<)(.*))?$", parts[0].strip())
+ match = DEPEND_RE.match(parts[0].strip())
if match:
depend.depname = match.group(1)
if match.group(2):
depend.depvcmp = match.group(2)
+ else:
+ logger.warning('Package %s had unparsable depend string %s',
+ package.pkgname, dep_str)
+ return None
depend.save(force_insert=True)
return depend
+def create_related(model, package, rel_str, equals_only=False):
+ related = model(pkg=package)
+ match = DEPEND_RE.match(rel_str)
+ if match:
+ related.name = match.group(1)
+ if match.group(3):
+ comp = match.group(3)
+ if not equals_only:
+ related.comparison = comp
+ elif comp != '=':
+ logger.warning(
+ 'Package %s had unexpected comparison operator %s for %s in %s',
+ package.pkgname, comp, model.__name__, rel_str)
+ if match.group(4):
+ related.version = match.group(4)
+ else:
+ logger.warning('Package %s had unparsable %s string %s',
+ package.pkgname, model.___name__, rel_str)
+ return None
+ related.save(force_insert=True)
+ return related
+
+def create_multivalued(dbpkg, repopkg, db_attr, repo_attr):
+ '''Populate the simplest of multivalued attributes. These are those that
+ only deal with a 'name' attribute, such as licenses, groups, etc. The input
+ and output objects and attribute names are specified, and everything is
+ done via getattr().'''
+ collection = getattr(dbpkg, db_attr)
+ collection.all().delete()
+ for name in getattr(repopkg, repo_attr):
+ collection.create(name=name)
+
def populate_pkg(dbpkg, repopkg, force=False, timestamp=None):
if repopkg.base:
dbpkg.pkgbase = repopkg.base
@@ -184,6 +241,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None):
dbpkg.pkgbase = repopkg.name
dbpkg.pkgver = repopkg.ver
dbpkg.pkgrel = repopkg.rel
+ dbpkg.epoch = repopkg.epoch
dbpkg.pkgdesc = repopkg.desc
dbpkg.url = repopkg.url
dbpkg.filename = repopkg.filename
@@ -210,38 +268,39 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None):
populate_files(dbpkg, repopkg, force=force)
dbpkg.packagedepend_set.all().delete()
- if hasattr(repopkg, 'depends'):
- for y in repopkg.depends:
- dep = create_depend(dbpkg, y)
- if hasattr(repopkg, 'optdepends'):
- for y in repopkg.optdepends:
- dep = create_depend(dbpkg, y, True)
+ for y in repopkg.depends:
+ create_depend(dbpkg, y)
+ for y in repopkg.optdepends:
+ create_depend(dbpkg, y, True)
- dbpkg.groups.all().delete()
- if hasattr(repopkg, 'groups'):
- for y in repopkg.groups:
- dbpkg.groups.create(name=y)
+ dbpkg.conflicts.all().delete()
+ for y in repopkg.conflicts:
+ create_related(Conflict, dbpkg, y)
+ dbpkg.provides.all().delete()
+ for y in repopkg.provides:
+ create_related(Provision, dbpkg, y, equals_only=True)
+ dbpkg.replaces.all().delete()
+ for y in repopkg.replaces:
+ create_related(Replacement, dbpkg, y)
- dbpkg.licenses.all().delete()
- if hasattr(repopkg, 'license'):
- for y in repopkg.license:
- dbpkg.licenses.create(name=y)
+ create_multivalued(dbpkg, repopkg, 'groups', 'groups')
+ create_multivalued(dbpkg, repopkg, 'licenses', 'license')
def populate_files(dbpkg, repopkg, force=False):
if not force:
- if dbpkg.pkgver != repopkg.ver or dbpkg.pkgrel != repopkg.rel:
- logger.info("db version (%s-%s) didn't match repo version (%s-%s) "
- "for package %s, skipping file list addition",
- dbpkg.pkgver, dbpkg.pkgrel, repopkg.ver, repopkg.rel,
- dbpkg.pkgname)
+ if dbpkg.pkgver != repopkg.ver or dbpkg.pkgrel != repopkg.rel \
+ or dbpkg.epoch != repopkg.epoch:
+ logger.info("DB version (%s) didn't match repo version "
+ "(%s) for package %s, skipping file list addition",
+ dbpkg.full_version, repopkg.full_version, dbpkg.pkgname)
return
if not dbpkg.files_last_update or not dbpkg.last_update:
pass
elif dbpkg.files_last_update > dbpkg.last_update:
return
# only delete files if we are reading a DB that contains them
- if hasattr(repopkg, 'files'):
+ if repopkg.has_files:
dbpkg.packagefile_set.all().delete()
logger.info("adding %d files for package %s",
len(repopkg.files), dbpkg.pkgname)
@@ -255,8 +314,8 @@ def populate_files(dbpkg, repopkg, force=False):
is_directory=(filename is None),
directory=dirname + '/',
filename=filename)
- pkgfile.save()
- dbpkg.files_last_update = datetime.now()
+ pkgfile.save(force_insert=True)
+ dbpkg.files_last_update = datetime.utcnow()
dbpkg.save()
@transaction.commit_on_success
@@ -273,25 +332,23 @@ def db_update(archname, reponame, pkgs, options):
filesonly = options.get('filesonly', False)
repository = Repo.objects.get(name__iexact=reponame)
architecture = Arch.objects.get(name__iexact=archname)
- dbpkgs = Package.objects.filter(arch=architecture, repo=repository)
- # It makes sense to fully evaluate our DB query now because we will
- # be using 99% of the objects in our "in both sets" loop. Force eval
- # by calling list() on the QuerySet.
- list(dbpkgs)
+ # no-arg order_by() removes even the default ordering; we don't need it
+ dbpkgs = Package.objects.filter(
+ arch=architecture, repo=repository).order_by()
# This makes our inner loop where we find packages by name *way* more
# efficient by not having to go to the database for each package to
# SELECT them by name.
dbdict = dict([(pkg.pkgname, pkg) for pkg in dbpkgs])
logger.debug("Creating sets")
- dbset = set([pkg.pkgname for pkg in dbpkgs])
+ dbset = set(dbdict.keys())
syncset = set([pkg.name for pkg in pkgs])
logger.info("%d packages in current web DB", len(dbset))
logger.info("%d packages in new updating db", len(syncset))
in_sync_not_db = syncset - dbset
logger.info("%d packages in sync not db", len(in_sync_not_db))
- # Try to catch those random orphaning issues that make Eric so unhappy.
+ # Try to catch those random package deletions that make Eric so unhappy.
if len(dbset):
dbpercent = 100.0 * len(syncset) / len(dbset)
else:
@@ -302,12 +359,14 @@ def db_update(archname, reponame, pkgs, options):
# means we expect the repo to fluctuate a lot.
msg = "Package database has %.1f%% the number of packages in the " \
"web database" % dbpercent
- if not filesonly and \
+ if len(dbset) == 0 and len(syncset) == 0:
+ pass
+ elif not filesonly and \
len(dbset) > 20 and dbpercent < 50.0 and \
- not repository.testing:
+ not repository.testing and not repository.staging:
logger.error(msg)
raise Exception(msg)
- if dbpercent < 75.0:
+ elif dbpercent < 75.0:
logger.warning(msg)
if not filesonly:
@@ -315,14 +374,14 @@ def db_update(archname, reponame, pkgs, options):
for p in [x for x in pkgs if x.name in in_sync_not_db]:
logger.info("Adding package %s", p.name)
pkg = Package(pkgname = p.name, arch = architecture, repo = repository)
- populate_pkg(pkg, p, timestamp=datetime.now())
+ populate_pkg(pkg, p, timestamp=datetime.utcnow())
# packages in database and not in syncdb (remove from database)
in_db_not_sync = dbset - syncset
for p in in_db_not_sync:
logger.info("Removing package %s from database", p)
- Package.objects.get(
- pkgname=p, arch=architecture, repo=repository).delete()
+ dbp = dbdict[p]
+ dbp.delete()
# packages in both database and in syncdb (update in database)
pkg_in_both = syncset & dbset
@@ -334,11 +393,12 @@ def db_update(archname, reponame, pkgs, options):
# for a non-force, we don't want to do anything at all.
if filesonly:
pass
- elif p.ver == dbp.pkgver and p.rel == dbp.pkgrel:
+ elif p.ver == dbp.pkgver and p.rel == dbp.pkgrel \
+ and p.epoch == dbp.epoch:
if not force:
continue
else:
- timestamp = datetime.now()
+ timestamp = datetime.utcnow()
if filesonly:
logger.debug("Checking files for package %s in database", p.name)
populate_files(dbp, p, force=force)
@@ -421,10 +481,9 @@ def parse_repo(repopath):
logger.info("Finished repo parsing, %d total packages", len(pkgs))
return (reponame, pkgs.values())
-def validate_arch(arch):
+def validate_arch(archname):
"Check if arch is valid."
- available_arches = [x.name for x in Arch.objects.all()]
- return arch in available_arches
+ return Arch.objects.filter(name__iexact=archname).exists()
def read_repo(primary_arch, repo_file, options):
"""
@@ -432,21 +491,22 @@ def read_repo(primary_arch, repo_file, options):
"""
repo, packages = parse_repo(repo_file)
- # sort packages by arch -- to handle noarch stuff
+ # group packages by arch -- to handle noarch stuff
packages_arches = {}
- packages_arches['any'] = []
+ for arch in Arch.objects.filter(agnostic=True):
+ packages_arches[arch.name] = []
packages_arches[primary_arch] = []
for package in packages:
- if package.arch in ('any', primary_arch):
+ if package.arch in packages_arches:
packages_arches[package.arch].append(package)
else:
# we don't include mis-arched packages
logger.warning("Package %s arch = %s",
package.name,package.arch)
logger.info('Starting database updates.')
- for (arch, pkgs) in packages_arches.items():
- db_update(arch, repo, pkgs, options)
+ for arch in sorted(packages_arches.keys()):
+ db_update(arch, repo, packages_arches[arch], options)
logger.info('Finished database updates.')
return 0