From 72a92102df4999dbcc370064707c9026d51c4fe7 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 18 May 2012 21:29:03 -0500 Subject: Switch to usage of new Depend object Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index fd8e3979..47294d9a 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -29,9 +29,9 @@ from django.db import connections, router, transaction from django.db.utils import IntegrityError from devel.utils import UserFinder -from main.models import Arch, Package, PackageDepend, PackageFile, Repo +from main.models import Arch, Package, PackageFile, Repo from main.utils import utc_now -from packages.models import Conflict, Provision, Replacement +from packages.models import Depend, Conflict, Provision, Replacement logging.basicConfig( @@ -141,19 +141,21 @@ class RepoPackage(object): return u'%s-%s' % (self.ver, self.rel) -DEPEND_RE = re.compile(r"^(.+?)((>=|<=|=|>|<)(.*))?$") +DEPEND_RE = re.compile(r"^(.+?)((>=|<=|=|>|<)(.+))?$") def create_depend(package, dep_str, optional=False): - depend = PackageDepend(pkg=package, optional=optional) + depend = Depend(pkg=package, optional=optional) # lop off any description first parts = dep_str.split(':', 1) if len(parts) > 1: depend.description = parts[1].strip() match = DEPEND_RE.match(parts[0].strip()) if match: - depend.depname = match.group(1) - if match.group(2): - depend.depvcmp = match.group(2) + depend.name = match.group(1) + if match.group(3): + depend.comparison = match.group(3) + if match.group(4): + related.version = match.group(4) else: logger.warning('Package %s had unparsable depend string %s', package.pkgname, dep_str) @@ -232,7 +234,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.depends.all().delete() deps = [create_depend(dbpkg, y) for y in repopkg.depends] deps += [create_depend(dbpkg, y, True) for y in repopkg.optdepends] - PackageDepend.objects.bulk_create(deps) + Depend.objects.bulk_create(deps) dbpkg.conflicts.all().delete() conflicts = [create_related(Conflict, dbpkg, y) for y in repopkg.conflicts] -- cgit v1.2.3-2-g168b From 3f9aeb45c2a2b498a389bacc92b5f56d9feb4329 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sat, 19 May 2012 09:54:15 -0500 Subject: reporead: fix copy/paste issue Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 47294d9a..2e8c4625 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -155,7 +155,7 @@ def create_depend(package, dep_str, optional=False): if match.group(3): depend.comparison = match.group(3) if match.group(4): - related.version = match.group(4) + depend.version = match.group(4) else: logger.warning('Package %s had unparsable depend string %s', package.pkgname, dep_str) -- cgit v1.2.3-2-g168b From a87fe016d1a1bf7fdcd2b19f515aa72a5b93db2b Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 1 Jul 2012 20:21:34 -0500 Subject: Log package updates during reporead invocation This adds a Manager and log_update method to help log all updates made to the packages table during reporead runs. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 2e8c4625..4e242af1 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -14,6 +14,7 @@ Example: """ from collections import defaultdict +from copy import copy import io import os import re @@ -31,7 +32,7 @@ from django.db.utils import IntegrityError from devel.utils import UserFinder from main.models import Arch, Package, PackageFile, Repo from main.utils import utc_now -from packages.models import Depend, Conflict, Provision, Replacement +from packages.models import Depend, Conflict, Provision, Replacement, Update logging.basicConfig( @@ -362,6 +363,7 @@ def db_update(archname, reponame, pkgs, force=False): try: with transaction.commit_on_success(): populate_pkg(dbpkg, pkg, timestamp=utc_now()) + Update.objects.log_update(None, dbpkg) except IntegrityError: logger.warning("Could not add package %s; " "not fatal if another thread beat us to it.", @@ -372,6 +374,7 @@ def db_update(archname, reponame, pkgs, force=False): logger.info("Removing package %s", pkgname) dbpkg = dbdict[pkgname] with transaction.commit_on_success(): + Update.objects.log_update(dbpkg, None) # no race condition here as long as simultaneous threads both # issue deletes; second delete will be a no-op delete_pkg_files(dbpkg) @@ -399,7 +402,9 @@ def db_update(archname, reponame, pkgs, force=False): logger.debug("Package %s was already updated", pkg.name) continue logger.info("Updating package %s", pkg.name) + prevpkg = copy(dbpkg) populate_pkg(dbpkg, pkg, force=force, timestamp=timestamp) + Update.objects.log_update(prevpkg, dbpkg) logger.info('Finished updating arch: %s', archname) -- cgit v1.2.3-2-g168b From daf011b67a338f26ead8058a9f9caedfe251c62c Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 5 Jul 2012 11:25:00 -0400 Subject: reporead: properly handle cases where last_update == files_last_update We should assume the filelists are up to date in this case, not out of date. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 4e242af1..51c73c02 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -273,7 +273,7 @@ def populate_files(dbpkg, repopkg, force=False): return if not dbpkg.files_last_update or not dbpkg.last_update: pass - elif dbpkg.files_last_update > dbpkg.last_update: + elif dbpkg.files_last_update >= dbpkg.last_update: return # only delete files if we are reading a DB that contains them @@ -427,7 +427,7 @@ def filesonly_update(archname, reponame, pkgs, force=False): with transaction.commit_on_success(): if not dbpkg.files_last_update or not dbpkg.last_update: pass - elif not force and dbpkg.files_last_update > dbpkg.last_update: + elif not force and dbpkg.files_last_update >= dbpkg.last_update: logger.debug("Files for %s are up to date", pkg.name) continue dbpkg = Package.objects.select_for_update().get(id=dbpkg.id) -- cgit v1.2.3-2-g168b From 9d91cad678133e97345111fab2c103fcda9b9f28 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 5 Jul 2012 11:25:40 -0400 Subject: reporead: handle files in root directory properly Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 51c73c02..df29a8a7 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -283,7 +283,10 @@ def populate_files(dbpkg, repopkg, force=False): len(repopkg.files), dbpkg.pkgname) pkg_files = [] for f in repopkg.files: - dirname, filename = f.rsplit('/', 1) + if '/' in f: + dirname, filename = f.rsplit('/', 1) + else: + dirname, filename = '', f if filename == '': filename = None pkgfile = PackageFile(pkg=dbpkg, -- cgit v1.2.3-2-g168b From 909cb9a209b4a4db00232b3a62656f95c4b88d45 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 5 Jul 2012 17:09:55 -0500 Subject: reporead: don't append slash to empty (root) directory Add the slash only if we have a directory name, and not otherwise. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index df29a8a7..43578d4a 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -285,13 +285,14 @@ def populate_files(dbpkg, repopkg, force=False): for f in repopkg.files: if '/' in f: dirname, filename = f.rsplit('/', 1) + dirname += '/' else: dirname, filename = '', f if filename == '': filename = None pkgfile = PackageFile(pkg=dbpkg, is_directory=(filename is None), - directory=dirname + '/', + directory=dirname, filename=filename) pkg_files.append(pkgfile) PackageFile.objects.bulk_create(pkg_files) -- cgit v1.2.3-2-g168b From a1ec14fc68282d67c00c79b5aa6aab60461f056a Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Mon, 12 Mar 2012 13:14:49 -0400 Subject: reporead: disable FULL synchronous writes for sqlite3 At least on Linux, we hit a huge bottleneck waiting for the FULL commit to happen for each added package during reporead operations. It makes much more sense to back this off to FULL level instead, which trades some possible loss of durability for speedier operation. Additionally, no one would possibly be running their production version of this site on sqlite3, right? Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 43578d4a..e50686b1 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -547,6 +547,12 @@ def read_repo(primary_arch, repo_file, options): package.name, repo_file, package.arch)) del packages + database = router.db_for_write(Package) + connection = connections[database] + if connection.vendor == 'sqlite': + cursor = connection.cursor() + cursor.execute('PRAGMA synchronous = NORMAL') + logger.info('Starting database updates for %s.', repo_file) for arch in sorted(packages_arches.keys()): if filesonly: -- cgit v1.2.3-2-g168b From 88ee61a39ac3690267f2b7903f3646972e8f055d Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 8 Jul 2012 21:24:48 -0500 Subject: Work around bulk_create limitations in sqlite3 in reporead Given the 999 SQL statement variable limit, we can easily hit it when updating a package with thousands of files or a few hundred depends. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index e50686b1..2d9b68b2 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -31,7 +31,7 @@ from django.db.utils import IntegrityError from devel.utils import UserFinder from main.models import Arch, Package, PackageFile, Repo -from main.utils import utc_now +from main.utils import utc_now, database_vendor from packages.models import Depend, Conflict, Provision, Replacement, Update @@ -184,6 +184,28 @@ def create_related(model, package, rel_str, equals_only=False): return None return related + +def batched_bulk_create(model, all_objects): + # for short lists, just bulk_create as we should be fine + if len(all_objects) < 20: + return model.objects.bulk_create(all_objects) + + if database_vendor(model, mode='write') == 'sqlite': + # 999 max variables in each SQL statement + incr = 999 // len(model._meta.fields) + else: + incr = 1000 + + def chunks(): + offset = 0 + while offset < len(all_objects): + yield all_objects[offset:offset + incr] + offset += incr + + for items in chunks(): + model.objects.bulk_create(items) + + def create_multivalued(dbpkg, repopkg, db_attr, repo_attr): '''Populate the simplest of multivalued attributes. These are those that only deal with a 'name' attribute, such as licenses, groups, etc. The input @@ -235,20 +257,20 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.depends.all().delete() deps = [create_depend(dbpkg, y) for y in repopkg.depends] deps += [create_depend(dbpkg, y, True) for y in repopkg.optdepends] - Depend.objects.bulk_create(deps) + batched_bulk_create(Depend, deps) dbpkg.conflicts.all().delete() conflicts = [create_related(Conflict, dbpkg, y) for y in repopkg.conflicts] - Conflict.objects.bulk_create(conflicts) + batched_bulk_create(Conflict, conflicts) dbpkg.provides.all().delete() provides = [create_related(Provision, dbpkg, y, equals_only=True) for y in repopkg.provides] - Provision.objects.bulk_create(provides) + batched_bulk_create(Provision, provides) dbpkg.replaces.all().delete() replaces = [create_related(Replacement, dbpkg, y) for y in repopkg.replaces] - Replacement.objects.bulk_create(replaces) + batched_bulk_create(Replacement, replaces) create_multivalued(dbpkg, repopkg, 'groups', 'groups') create_multivalued(dbpkg, repopkg, 'licenses', 'license') @@ -295,7 +317,7 @@ def populate_files(dbpkg, repopkg, force=False): directory=dirname, filename=filename) pkg_files.append(pkgfile) - PackageFile.objects.bulk_create(pkg_files) + batched_bulk_create(PackageFile, pkg_files) dbpkg.files_last_update = utc_now() dbpkg.save() -- cgit v1.2.3-2-g168b From c0bf9e20660cfae7ea8994472555bba23398b598 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 24 Jul 2012 09:19:48 -0500 Subject: Remove custom utc_now() function, use django.utils.timezone.now() This was around from the time when we handled timezones sanely and Django did not; now that we are on 1.4 we no longer need our own code to handle this. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 2d9b68b2..e69691db 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -28,10 +28,11 @@ from pytz import utc from django.core.management.base import BaseCommand, CommandError from django.db import connections, router, transaction from django.db.utils import IntegrityError +from django.utils.timezone import now from devel.utils import UserFinder from main.models import Arch, Package, PackageFile, Repo -from main.utils import utc_now, database_vendor +from main.utils import database_vendor from packages.models import Depend, Conflict, Provision, Replacement, Update @@ -318,7 +319,7 @@ def populate_files(dbpkg, repopkg, force=False): filename=filename) pkg_files.append(pkgfile) batched_bulk_create(PackageFile, pkg_files) - dbpkg.files_last_update = utc_now() + dbpkg.files_last_update = now() dbpkg.save() @@ -388,7 +389,7 @@ def db_update(archname, reponame, pkgs, force=False): dbpkg = Package(pkgname=pkg.name, arch=architecture, repo=repository) try: with transaction.commit_on_success(): - populate_pkg(dbpkg, pkg, timestamp=utc_now()) + populate_pkg(dbpkg, pkg, timestamp=now()) Update.objects.log_update(None, dbpkg) except IntegrityError: logger.warning("Could not add package %s; " @@ -417,7 +418,7 @@ def db_update(archname, reponame, pkgs, force=False): if not force and pkg_same_version(pkg, dbpkg): continue elif not force: - timestamp = utc_now() + timestamp = now() # The odd select_for_update song and dance here are to ensure # simultaneous updates don't happen on a package, causing -- cgit v1.2.3-2-g168b From 280c53eec5661252b5692fa374292c4d421e3bd8 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sat, 28 Jul 2012 11:45:15 -0500 Subject: reporead: don't use iexact lookup on arch name We don't do this anywhere else, so we shouldn't do this here either. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index e69691db..aaa9812e 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -332,7 +332,7 @@ def update_common(archname, reponame, pkgs, sanity_check=True): transaction.set_dirty() repository = Repo.objects.get(name__iexact=reponame) - architecture = Arch.objects.get(name__iexact=archname) + architecture = Arch.objects.get(name=archname) # no-arg order_by() removes even the default ordering; we don't need it dbpkgs = Package.objects.filter( arch=architecture, repo=repository).order_by() @@ -371,7 +371,7 @@ def db_update(archname, reponame, pkgs, force=False): logger.info('Updating %s (%s)', reponame, archname) dbpkgs = update_common(archname, reponame, pkgs, True) repository = Repo.objects.get(name__iexact=reponame) - architecture = Arch.objects.get(name__iexact=archname) + architecture = Arch.objects.get(name=archname) # This makes our inner loop where we find packages by name *way* more # efficient by not having to go to the database for each package to @@ -538,7 +538,7 @@ def locate_arch(arch): if isinstance(arch, Arch): return arch try: - return Arch.objects.get(name__iexact=arch) + return Arch.objects.get(name=arch) except Arch.DoesNotExist: raise CommandError( 'Specified architecture %s is not currently known.' % arch) -- cgit v1.2.3-2-g168b From a64bbbd4139d91cbbca10d804067cbd87a95872d Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 31 Jul 2012 20:27:43 -0500 Subject: Make adjustments for optional -> deptype conversion Very little dealt directly with this field. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index aaa9812e..a3bf3e0c 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -145,8 +145,8 @@ class RepoPackage(object): DEPEND_RE = re.compile(r"^(.+?)((>=|<=|=|>|<)(.+))?$") -def create_depend(package, dep_str, optional=False): - depend = Depend(pkg=package, optional=optional) +def create_depend(package, dep_str, deptype='D'): + depend = Depend(pkg=package, deptype=deptype) # lop off any description first parts = dep_str.split(':', 1) if len(parts) > 1: @@ -257,7 +257,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.depends.all().delete() deps = [create_depend(dbpkg, y) for y in repopkg.depends] - deps += [create_depend(dbpkg, y, True) for y in repopkg.optdepends] + deps += [create_depend(dbpkg, y, 'O') for y in repopkg.optdepends] batched_bulk_create(Depend, deps) dbpkg.conflicts.all().delete() -- cgit v1.2.3-2-g168b From 1f2466fffceafebfaca34e3ed2d34de6b622768b Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 31 Jul 2012 20:35:50 -0500 Subject: reporead: import make and check depends We don't have these in the database yet, but future verisons of repo-add will put this information in the sync databases. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index a3bf3e0c..8b55b09a 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -80,8 +80,9 @@ class RepoPackage(object): bare = ( 'name', 'base', 'arch', 'filename', 'md5sum', 'sha256sum', 'url', 'packager' ) number = ( 'csize', 'isize' ) - collections = ( 'depends', 'optdepends', 'conflicts', - 'provides', 'replaces', 'groups', 'license', 'files' ) + collections = ( 'depends', 'optdepends', 'makedepends', 'checkdepends', + 'conflicts', 'provides', 'replaces', 'groups', 'license', + 'files' ) version_re = re.compile(r'^((\d+):)?(.+)-([^-]+)$') @@ -258,6 +259,8 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.depends.all().delete() deps = [create_depend(dbpkg, y) for y in repopkg.depends] deps += [create_depend(dbpkg, y, 'O') for y in repopkg.optdepends] + deps += [create_depend(dbpkg, y, 'M') for y in repopkg.makedepends] + deps += [create_depend(dbpkg, y, 'C') for y in repopkg.checkdepends] batched_bulk_create(Depend, deps) dbpkg.conflicts.all().delete() -- cgit v1.2.3-2-g168b From 241ff8fbd79f9f17cd326a34eb39096851f630ba Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 8 Aug 2012 22:07:06 -0500 Subject: Extract parse_version function from reporead logic Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 8b55b09a..af0a2dc0 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -34,6 +34,7 @@ from devel.utils import UserFinder from main.models import Arch, Package, PackageFile, Repo from main.utils import database_vendor from packages.models import Depend, Conflict, Provision, Replacement, Update +from packages.utils import parse_version logging.basicConfig( @@ -84,8 +85,6 @@ class RepoPackage(object): 'conflicts', 'provides', 'replaces', 'groups', 'license', 'files' ) - version_re = re.compile(r'^((\d+):)?(.+)-([^-]+)$') - def __init__(self, repo): self.repo = repo self.ver = None @@ -112,11 +111,7 @@ class RepoPackage(object): # do NOT prune these values at all setattr(self, k, v[0]) elif k == 'version': - match = self.version_re.match(v[0]) - self.ver = match.group(3) - self.rel = match.group(4) - if match.group(2): - self.epoch = int(match.group(2)) + self.ver, self.rel, self.epoch = parse_version(v[0]) elif k == 'builddate': try: builddate = datetime.utcfromtimestamp(int(v[0])) -- cgit v1.2.3-2-g168b From c76e5c768687394b8022883d01edf85dc3c30e7f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Mon, 17 Sep 2012 21:42:48 -0500 Subject: Sort package list before inserting it into the database FS#30323. This will take some time to propagate to all existing packages, but all new and updated packages will start getting filelists in the right order. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index af0a2dc0..ac745092 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -303,7 +303,10 @@ def populate_files(dbpkg, repopkg, force=False): logger.info("adding %d files for package %s", len(repopkg.files), dbpkg.pkgname) pkg_files = [] - for f in repopkg.files: + # sort in normal alpha-order that pacman uses, rather than makepkg's + # default breadth-first, directory-first ordering + files = sorted(repopkg.files) + for f in files: if '/' in f: dirname, filename = f.rsplit('/', 1) dirname += '/' -- cgit v1.2.3-2-g168b From 7d5cfe45d52c4dbd2f431f0edcafc9936b740ab2 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 20 Sep 2012 09:32:42 -0500 Subject: Explicitly close the database connection in reporead This is the cause of these warnings showing up in the PostgreSQL log: LOG: unexpected EOF on client connection with an open transaction All management commands are guilty of this as they do not clean up and close the connection when they exit, unlike the standard web request cycle. Other commands should probably be updated as well, but for now, this is the biggest culprit. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 1 + 1 file changed, 1 insertion(+) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index ac745092..ce5c8cb7 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -584,6 +584,7 @@ def read_repo(primary_arch, repo_file, options): else: db_update(arch, repo, packages_arches[arch], force) logger.info('Finished database updates for %s.', repo_file) + connection.close() return 0 # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From 5228cb5f584f076e547e1d0af695c08975801d2f Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 12 Oct 2012 11:39:16 -0500 Subject: reporead: don't print full backtrace if unnecessary In the architecture agnostic case, this error is much more likely to happen, so printing it like an error message is deceiving. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index ce5c8cb7..a1e77b49 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -393,9 +393,12 @@ def db_update(archname, reponame, pkgs, force=False): populate_pkg(dbpkg, pkg, timestamp=now()) Update.objects.log_update(None, dbpkg) except IntegrityError: - logger.warning("Could not add package %s; " - "not fatal if another thread beat us to it.", - pkg.name, exc_info=True) + if architecture.agnostic: + logger.warning("Could not add package %s; " + "not fatal if another thread beat us to it.", + pkg.name) + else: + logger.exception("Could not add package %s", pkg.name) # packages in database and not in syncdb (remove from database) for pkgname in (dbset - syncset): -- cgit v1.2.3-2-g168b From 6dd4d54bb0adbbb0f8c2b1beaa92b7a58971cf88 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 16 Nov 2012 16:20:11 -0600 Subject: Use Python 2.7 dictionary comprehension syntax Rather than the old idiom of dict((k, v) for <> in <>). Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index a1e77b49..3d4e6375 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -377,7 +377,7 @@ def db_update(archname, reponame, pkgs, force=False): # This makes our inner loop where we find packages by name *way* more # efficient by not having to go to the database for each package to # SELECT them by name. - dbdict = dict((dbpkg.pkgname, dbpkg) for dbpkg in dbpkgs) + dbdict = {dbpkg.pkgname: dbpkg for dbpkg in dbpkgs} dbset = set(dbdict.keys()) syncset = set([pkg.name for pkg in pkgs]) @@ -446,7 +446,7 @@ def filesonly_update(archname, reponame, pkgs, force=False): """ logger.info('Updating files for %s (%s)', reponame, archname) dbpkgs = update_common(archname, reponame, pkgs, False) - dbdict = dict((dbpkg.pkgname, dbpkg) for dbpkg in dbpkgs) + dbdict = {dbpkg.pkgname: dbpkg for dbpkg in dbpkgs} dbset = set(dbdict.keys()) for pkg in (pkg for pkg in pkgs if pkg.name in dbset): -- cgit v1.2.3-2-g168b From 9e9157d0a8cbf9ea076231e438fb30f58bff8e29 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 16 Nov 2012 16:37:31 -0600 Subject: Use python set comprehension syntax supported in 2.7 Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 3d4e6375..981c4dce 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -380,7 +380,7 @@ def db_update(archname, reponame, pkgs, force=False): dbdict = {dbpkg.pkgname: dbpkg for dbpkg in dbpkgs} dbset = set(dbdict.keys()) - syncset = set([pkg.name for pkg in pkgs]) + syncset = {pkg.name for pkg in pkgs} in_sync_not_db = syncset - dbset logger.info("%d packages in sync not db", len(in_sync_not_db)) -- cgit v1.2.3-2-g168b From 04f23a040a839f4989fdc83afe0f5ad4f72224be Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Mon, 31 Dec 2012 10:17:22 -0600 Subject: Add 'created' field to packages model This will be used to eventually implement the UI side of FS#13441, but to do that, we first need the data. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index 981c4dce..e00e54c3 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -387,10 +387,12 @@ def db_update(archname, reponame, pkgs, force=False): # packages in syncdb and not in database (add to database) for pkg in (pkg for pkg in pkgs if pkg.name in in_sync_not_db): logger.info("Adding package %s", pkg.name) - dbpkg = Package(pkgname=pkg.name, arch=architecture, repo=repository) + timestamp = now() + dbpkg = Package(pkgname=pkg.name, arch=architecture, repo=repository, + created=timestamp) try: with transaction.commit_on_success(): - populate_pkg(dbpkg, pkg, timestamp=now()) + populate_pkg(dbpkg, pkg, timestamp=timestamp) Update.objects.log_update(None, dbpkg) except IntegrityError: if architecture.agnostic: -- cgit v1.2.3-2-g168b From af32c23768c7537f19e0613525579208b4f44eb4 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 15 Jan 2013 20:49:56 -0600 Subject: Handle connection and transaction more properly in reporead A few minor things are fixed here. One is PostgreSQL, and more specifically pgbouncer, don't like it when the connection is closed after psycopg2 has started an implicit transaction even for read-only queries. Ensure we call commit as our last database action in all cases. The other is related- Django in management commands doesn't ever call close on any database connection you may have been using, so PostgreSQL gets mad about this fact and logs a message saying such. Close the connection explicitly when we are done with it to play nice. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 1 + 1 file changed, 1 insertion(+) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index e00e54c3..ab0efeed 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -589,6 +589,7 @@ def read_repo(primary_arch, repo_file, options): else: db_update(arch, repo, packages_arches[arch], force) logger.info('Finished database updates for %s.', repo_file) + connection.commit() connection.close() return 0 -- cgit v1.2.3-2-g168b From 55179a4f9e8b80d515bae7032af8aefc33ae0192 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 16 Jan 2013 16:07:26 -0600 Subject: reporead: remove batched_bulk_create Now that Django 1.5 is out and realized SQLite3 only allows for 999 parameters per SQL call, we don't need to manually batch things up anymore and can let the underlying bulk_create code do it for us. This basically reverts commit 88ee61a39ac3. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 32 +++++--------------------------- 1 file changed, 5 insertions(+), 27 deletions(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index ab0efeed..ccac55f2 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -32,7 +32,6 @@ from django.utils.timezone import now from devel.utils import UserFinder from main.models import Arch, Package, PackageFile, Repo -from main.utils import database_vendor from packages.models import Depend, Conflict, Provision, Replacement, Update from packages.utils import parse_version @@ -182,27 +181,6 @@ def create_related(model, package, rel_str, equals_only=False): return related -def batched_bulk_create(model, all_objects): - # for short lists, just bulk_create as we should be fine - if len(all_objects) < 20: - return model.objects.bulk_create(all_objects) - - if database_vendor(model, mode='write') == 'sqlite': - # 999 max variables in each SQL statement - incr = 999 // len(model._meta.fields) - else: - incr = 1000 - - def chunks(): - offset = 0 - while offset < len(all_objects): - yield all_objects[offset:offset + incr] - offset += incr - - for items in chunks(): - model.objects.bulk_create(items) - - def create_multivalued(dbpkg, repopkg, db_attr, repo_attr): '''Populate the simplest of multivalued attributes. These are those that only deal with a 'name' attribute, such as licenses, groups, etc. The input @@ -256,20 +234,20 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): deps += [create_depend(dbpkg, y, 'O') for y in repopkg.optdepends] deps += [create_depend(dbpkg, y, 'M') for y in repopkg.makedepends] deps += [create_depend(dbpkg, y, 'C') for y in repopkg.checkdepends] - batched_bulk_create(Depend, deps) + Depend.objects.bulk_create(deps) dbpkg.conflicts.all().delete() conflicts = [create_related(Conflict, dbpkg, y) for y in repopkg.conflicts] - batched_bulk_create(Conflict, conflicts) + Conflict.objects.bulk_create(conflicts) dbpkg.provides.all().delete() provides = [create_related(Provision, dbpkg, y, equals_only=True) for y in repopkg.provides] - batched_bulk_create(Provision, provides) + Provision.objects.bulk_create(provides) dbpkg.replaces.all().delete() replaces = [create_related(Replacement, dbpkg, y) for y in repopkg.replaces] - batched_bulk_create(Replacement, replaces) + Replacement.objects.bulk_create(replaces) create_multivalued(dbpkg, repopkg, 'groups', 'groups') create_multivalued(dbpkg, repopkg, 'licenses', 'license') @@ -319,7 +297,7 @@ def populate_files(dbpkg, repopkg, force=False): directory=dirname, filename=filename) pkg_files.append(pkgfile) - batched_bulk_create(PackageFile, pkg_files) + PackageFile.objects.bulk_create(pkg_files) dbpkg.files_last_update = now() dbpkg.save() -- cgit v1.2.3-2-g168b From 25aa164823a88f13761c750057833b6724808675 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 19 Mar 2013 00:10:52 -0500 Subject: Remove old-style build date parsing This was added in 2010 in commit e95c4563e32 as a short-term fix. The short-term is up. Signed-off-by: Dan McGee --- devel/management/commands/reporead.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'devel/management/commands/reporead.py') diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index ccac55f2..a0e77dc7 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -116,13 +116,9 @@ class RepoPackage(object): builddate = datetime.utcfromtimestamp(int(v[0])) self.builddate = builddate.replace(tzinfo=utc) except ValueError: - try: - self.builddate = datetime.strptime(v[0], - '%a %b %d %H:%M:%S %Y') - except ValueError: - logger.warning( - 'Package %s had unparsable build date %s', - self.name, v[0]) + logger.warning( + 'Package %s had unparsable build date %s', + self.name, v[0]) elif k == 'files': self.files = tuple(v) self.has_files = True -- cgit v1.2.3-2-g168b