diff options
author | Dan McGee <dan@archlinux.org> | 2012-07-08 21:24:48 -0500 |
---|---|---|
committer | Dan McGee <dan@archlinux.org> | 2012-07-08 21:24:48 -0500 |
commit | 88ee61a39ac3690267f2b7903f3646972e8f055d (patch) | |
tree | fa3eca2b2b7c308ee2c1083f550e3cb7874d3a9a /devel/management/commands | |
parent | ef8fb7c7f242fc0f081f86d283e2fac22504a6b5 (diff) |
Work around bulk_create limitations in sqlite3 in reporead
Given the 999 SQL statement variable limit, we can easily hit it when
updating a package with thousands of files or a few hundred depends.
Signed-off-by: Dan McGee <dan@archlinux.org>
Diffstat (limited to 'devel/management/commands')
-rw-r--r-- | devel/management/commands/reporead.py | 34 |
1 files changed, 28 insertions, 6 deletions
diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py index e50686b1..2d9b68b2 100644 --- a/devel/management/commands/reporead.py +++ b/devel/management/commands/reporead.py @@ -31,7 +31,7 @@ from django.db.utils import IntegrityError from devel.utils import UserFinder from main.models import Arch, Package, PackageFile, Repo -from main.utils import utc_now +from main.utils import utc_now, database_vendor from packages.models import Depend, Conflict, Provision, Replacement, Update @@ -184,6 +184,28 @@ def create_related(model, package, rel_str, equals_only=False): return None return related + +def batched_bulk_create(model, all_objects): + # for short lists, just bulk_create as we should be fine + if len(all_objects) < 20: + return model.objects.bulk_create(all_objects) + + if database_vendor(model, mode='write') == 'sqlite': + # 999 max variables in each SQL statement + incr = 999 // len(model._meta.fields) + else: + incr = 1000 + + def chunks(): + offset = 0 + while offset < len(all_objects): + yield all_objects[offset:offset + incr] + offset += incr + + for items in chunks(): + model.objects.bulk_create(items) + + def create_multivalued(dbpkg, repopkg, db_attr, repo_attr): '''Populate the simplest of multivalued attributes. These are those that only deal with a 'name' attribute, such as licenses, groups, etc. The input @@ -235,20 +257,20 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None): dbpkg.depends.all().delete() deps = [create_depend(dbpkg, y) for y in repopkg.depends] deps += [create_depend(dbpkg, y, True) for y in repopkg.optdepends] - Depend.objects.bulk_create(deps) + batched_bulk_create(Depend, deps) dbpkg.conflicts.all().delete() conflicts = [create_related(Conflict, dbpkg, y) for y in repopkg.conflicts] - Conflict.objects.bulk_create(conflicts) + batched_bulk_create(Conflict, conflicts) dbpkg.provides.all().delete() provides = [create_related(Provision, dbpkg, y, equals_only=True) for y in repopkg.provides] - Provision.objects.bulk_create(provides) + batched_bulk_create(Provision, provides) dbpkg.replaces.all().delete() replaces = [create_related(Replacement, dbpkg, y) for y in repopkg.replaces] - Replacement.objects.bulk_create(replaces) + batched_bulk_create(Replacement, replaces) create_multivalued(dbpkg, repopkg, 'groups', 'groups') create_multivalued(dbpkg, repopkg, 'licenses', 'license') @@ -295,7 +317,7 @@ def populate_files(dbpkg, repopkg, force=False): directory=dirname, filename=filename) pkg_files.append(pkgfile) - PackageFile.objects.bulk_create(pkg_files) + batched_bulk_create(PackageFile, pkg_files) dbpkg.files_last_update = utc_now() dbpkg.save() |