diff options
author | Dan McGee <dan@archlinux.org> | 2013-01-18 20:52:20 -0600 |
---|---|---|
committer | Dan McGee <dan@archlinux.org> | 2013-01-18 20:52:20 -0600 |
commit | 375684ed91dd5499e7a4ea7787e45803e8467e16 (patch) | |
tree | c9d4d5473dda1063fce29b64e1e5880c31afe8f1 | |
parent | 7313a7914dac9c86d48656964d310cff7fc2a0e1 (diff) |
Use a set instead of list when gathering package differences
If we implement the __eq__ and __hash__ methods, we can use a set to
gather package difference objects and make deduplication of objects a
lot more efficient.
Signed-off-by: Dan McGee <dan@archlinux.org>
-rw-r--r-- | packages/utils.py | 25 |
1 files changed, 15 insertions, 10 deletions
diff --git a/packages/utils.py b/packages/utils.py index 5f0c111e..a72404f4 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -1,6 +1,6 @@ from collections import defaultdict from itertools import chain -from operator import itemgetter +from operator import attrgetter, itemgetter import re from django.core.serializers.json import DjangoJSONEncoder @@ -108,10 +108,15 @@ class Difference(object): css_classes.append(self.pkg_b.arch.name) return ' '.join(css_classes) - def __cmp__(self, other): - if isinstance(other, Difference): - return cmp(self.__dict__, other.__dict__) - return False + def __key(self): + return (self.pkgname, hash(self.repo), + hash(self.pkg_a), hash(self.pkg_b)) + + def __eq__(self, other): + return self.__key() == other.__key() + + def __hash__(self): + return hash(self.__key()) @cache_function(127) @@ -146,8 +151,8 @@ SELECT p.id, q.id to_fetch = {row[0] for row in results} # fetch all of the necessary packages pkgs = Package.objects.normal().in_bulk(to_fetch) - # now build a list of tuples containing differences - differences = [] + # now build a set containing differences + differences = set() for row in results: pkg_a = pkgs.get(row[0]) pkg_b = pkgs.get(row[1]) @@ -160,11 +165,11 @@ SELECT p.id, q.id name = pkg_a.pkgname if pkg_a else pkg_b.pkgname repo = pkg_a.repo if pkg_a else pkg_b.repo item = Difference(name, repo, pkg_b, pkg_a) - if item not in differences: - differences.append(item) + differences.add(item) # now sort our list by repository, package name - differences.sort(key=lambda a: (a.repo.name, a.pkgname)) + key_func = attrgetter('repo.name', 'pkgname') + differences = sorted(differences, key=key_func) return differences |