summaryrefslogtreecommitdiff
path: root/devel/management
diff options
context:
space:
mode:
authorNicolás Reynolds <fauno@kiwwwi.com.ar>2011-08-03 16:01:52 -0300
committerNicolás Reynolds <fauno@kiwwwi.com.ar>2011-08-03 16:01:52 -0300
commita8b2fc84ba96c83ec1addf89ac04608fbf572705 (patch)
tree6f54cbe43b8684908f2e80ba311272c06fd0fd08 /devel/management
parent294bf173236610fc8c308f81d8617e7e0d0e4bff (diff)
parentb0bad20756549df5edf726771c8e6869caba6244 (diff)
Merge branch 'master' of git://projects.archlinux.org/archweb
Conflicts: templates/base.html
Diffstat (limited to 'devel/management')
-rw-r--r--devel/management/commands/reporead.py125
1 files changed, 76 insertions, 49 deletions
diff --git a/devel/management/commands/reporead.py b/devel/management/commands/reporead.py
index a8875c7e..baf7fee1 100644
--- a/devel/management/commands/reporead.py
+++ b/devel/management/commands/reporead.py
@@ -18,7 +18,8 @@ from django.contrib.auth.models import User
from django.db import transaction
from django.db.models import Q
-import codecs
+from collections import defaultdict
+import io
import os
import re
import sys
@@ -27,14 +28,6 @@ import logging
from datetime import datetime
from optparse import make_option
-# New in 2.6, but fast (C implementation) in 2.7. We will use it over codecs if
-# available. Eventually remove the codecs import completely.
-io = None
-try:
- import io
-except ImportError:
- pass
-
from main.models import Arch, Package, PackageDepend, PackageFile, Repo
from packages.models import Conflict, Provision, Replacement
@@ -74,18 +67,13 @@ class Command(BaseCommand):
elif v == 2:
logger.level = logging.DEBUG
- import signal, traceback
- handler = lambda sig, stack: traceback.print_stack(stack)
- signal.signal(signal.SIGQUIT, handler)
- signal.signal(signal.SIGUSR1, handler)
-
return read_repo(arch, filename, options)
class Pkg(object):
"""An interim 'container' object for holding Arch package data."""
bare = ( 'name', 'base', 'arch', 'desc', 'filename',
- 'md5sum', 'url', 'builddate', 'packager' )
+ 'md5sum', 'url', 'packager' )
number = ( 'csize', 'isize' )
collections = ( 'depends', 'optdepends', 'conflicts',
'provides', 'replaces', 'groups', 'license', 'files' )
@@ -101,8 +89,7 @@ class Pkg(object):
setattr(self, k, None)
for k in self.collections:
setattr(self, k, ())
- # So we can tell the diffence between a package with no files, and a DB
- # without files entries
+ self.files = None
self.has_files = False
def populate(self, values):
@@ -118,12 +105,22 @@ class Pkg(object):
self.rel = match.group(4)
if match.group(2):
self.epoch = int(match.group(2))
+ elif k == 'builddate':
+ try:
+ self.builddate = datetime.utcfromtimestamp(int(v[0]))
+ except ValueError:
+ try:
+ self.builddate = datetime.strptime(v[0],
+ '%a %b %d %H:%M:%S %Y')
+ except ValueError:
+ logger.warning('Package %s had unparsable build date %s',
+ self.name, v[0])
elif k == 'files':
- self.files = v
+ self.files = tuple(v)
self.has_files = True
else:
# anything left in collections
- setattr(self, k, v)
+ setattr(self, k, tuple(v))
@property
def full_version(self):
@@ -235,6 +232,8 @@ def create_multivalued(dbpkg, repopkg, db_attr, repo_attr):
collection.create(name=name)
def populate_pkg(dbpkg, repopkg, force=False, timestamp=None):
+ db_score = 1
+
if repopkg.base:
dbpkg.pkgbase = repopkg.base
else:
@@ -247,15 +246,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None):
dbpkg.filename = repopkg.filename
dbpkg.compressed_size = repopkg.csize
dbpkg.installed_size = repopkg.isize
- try:
- dbpkg.build_date = datetime.utcfromtimestamp(int(repopkg.builddate))
- except ValueError:
- try:
- dbpkg.build_date = datetime.strptime(repopkg.builddate,
- '%a %b %d %H:%M:%S %Y')
- except ValueError:
- logger.warning('Package %s had unparsable build date %s',
- repopkg.name, repopkg.builddate)
+ dbpkg.build_date = repopkg.builddate
dbpkg.packager_str = repopkg.packager
# attempt to find the corresponding django user for this string
dbpkg.packager = find_user(repopkg.packager)
@@ -265,7 +256,7 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None):
dbpkg.last_update = timestamp
dbpkg.save()
- populate_files(dbpkg, repopkg, force=force)
+ db_score += populate_files(dbpkg, repopkg, force=force)
dbpkg.packagedepend_set.all().delete()
for y in repopkg.depends:
@@ -286,6 +277,15 @@ def populate_pkg(dbpkg, repopkg, force=False, timestamp=None):
create_multivalued(dbpkg, repopkg, 'groups', 'groups')
create_multivalued(dbpkg, repopkg, 'licenses', 'license')
+ related_score = (len(repopkg.depends) + len(repopkg.optdepends)
+ + len(repopkg.conflicts) + len(repopkg.provides)
+ + len(repopkg.replaces) + len(repopkg.groups)
+ + len(repopkg.license))
+ if related_score:
+ db_score += (related_score / 20) + 1
+
+ return db_score
+
def populate_files(dbpkg, repopkg, force=False):
if not force:
@@ -294,11 +294,11 @@ def populate_files(dbpkg, repopkg, force=False):
logger.info("DB version (%s) didn't match repo version "
"(%s) for package %s, skipping file list addition",
dbpkg.full_version, repopkg.full_version, dbpkg.pkgname)
- return
+ return 0
if not dbpkg.files_last_update or not dbpkg.last_update:
pass
elif dbpkg.files_last_update > dbpkg.last_update:
- return
+ return 0
# only delete files if we are reading a DB that contains them
if repopkg.has_files:
dbpkg.packagefile_set.all().delete()
@@ -317,6 +317,28 @@ def populate_files(dbpkg, repopkg, force=False):
pkgfile.save(force_insert=True)
dbpkg.files_last_update = datetime.utcnow()
dbpkg.save()
+ return (len(repopkg.files) / 50) + 1
+ return 0
+
+
+class Batcher(object):
+ def __init__(self, threshold, start=0):
+ self.threshold = threshold
+ self.meter = start
+
+ def batch_commit(self, score):
+ """
+ Track updates to the database and perform a commit if the batch
+ becomes sufficiently large. "Large" is defined by waiting for the
+ sum of scores to exceed the arbitrary threshold value; once it is
+ hit a commit is issued.
+ """
+ self.meter += score
+ if self.meter > self.threshold:
+ logger.debug("Committing transaction, batch threshold hit")
+ transaction.commit()
+ self.meter = 0
+
@transaction.commit_on_success
def db_update(archname, reponame, pkgs, options):
@@ -369,19 +391,23 @@ def db_update(archname, reponame, pkgs, options):
elif dbpercent < 75.0:
logger.warning(msg)
+ batcher = Batcher(100)
+
if not filesonly:
# packages in syncdb and not in database (add to database)
for p in [x for x in pkgs if x.name in in_sync_not_db]:
logger.info("Adding package %s", p.name)
- pkg = Package(pkgname = p.name, arch = architecture, repo = repository)
- populate_pkg(pkg, p, timestamp=datetime.utcnow())
+ pkg = Package(pkgname=p.name, arch=architecture, repo=repository)
+ score = populate_pkg(pkg, p, timestamp=datetime.utcnow())
+ batcher.batch_commit(score)
# packages in database and not in syncdb (remove from database)
in_db_not_sync = dbset - syncset
for p in in_db_not_sync:
- logger.info("Removing package %s from database", p)
+ logger.info("Removing package %s", p)
dbp = dbdict[p]
dbp.delete()
+ batcher.batch_commit(1)
# packages in both database and in syncdb (update in database)
pkg_in_both = syncset & dbset
@@ -399,12 +425,15 @@ def db_update(archname, reponame, pkgs, options):
continue
else:
timestamp = datetime.utcnow()
+
if filesonly:
- logger.debug("Checking files for package %s in database", p.name)
- populate_files(dbp, p, force=force)
+ logger.debug("Checking files for package %s", p.name)
+ score = populate_files(dbp, p, force=force)
else:
- logger.info("Updating package %s in database", p.name)
- populate_pkg(dbp, p, force=force, timestamp=timestamp)
+ logger.info("Updating package %s", p.name)
+ score = populate_pkg(dbp, p, force=force, timestamp=timestamp)
+
+ batcher.batch_commit(score)
logger.info('Finished updating Arch: %s', archname)
@@ -454,23 +483,19 @@ def parse_repo(repopath):
repodb = tarfile.open(repopath, "r")
logger.debug("Starting package parsing")
dbfiles = ('desc', 'depends', 'files')
- pkgs = {}
+ newpkg = lambda: Pkg(reponame)
+ pkgs = defaultdict(newpkg)
for tarinfo in repodb.getmembers():
if tarinfo.isreg():
pkgid, fname = os.path.split(tarinfo.name)
if fname not in dbfiles:
continue
data_file = repodb.extractfile(tarinfo)
- if io is None:
- data_file = codecs.EncodedFile(data_file, 'utf-8')
- else:
- data_file = io.TextIOWrapper(io.BytesIO(data_file.read()),
- encoding='utf=8')
+ data_file = io.TextIOWrapper(io.BytesIO(data_file.read()),
+ encoding='utf=8')
try:
- data = parse_info(data_file)
- p = pkgs.setdefault(pkgid, Pkg(reponame))
- p.populate(data)
- except UnicodeDecodeError, e:
+ pkgs[pkgid].populate(parse_info(data_file))
+ except UnicodeDecodeError:
logger.warn("Could not correctly decode %s, skipping file",
tarinfo.name)
data_file.close()
@@ -503,7 +528,9 @@ def read_repo(primary_arch, repo_file, options):
else:
# we don't include mis-arched packages
logger.warning("Package %s arch = %s",
- package.name,package.arch)
+ package.name, package.arch)
+ del packages
+
logger.info('Starting database updates.')
for arch in sorted(packages_arches.keys()):
db_update(arch, repo, packages_arches[arch], options)