summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan McGee <dan@archlinux.org>2013-01-18 20:52:20 -0600
committerDan McGee <dan@archlinux.org>2013-01-18 20:52:20 -0600
commit375684ed91dd5499e7a4ea7787e45803e8467e16 (patch)
treec9d4d5473dda1063fce29b64e1e5880c31afe8f1
parent7313a7914dac9c86d48656964d310cff7fc2a0e1 (diff)
Use a set instead of list when gathering package differences
If we implement the __eq__ and __hash__ methods, we can use a set to gather package difference objects and make deduplication of objects a lot more efficient. Signed-off-by: Dan McGee <dan@archlinux.org>
-rw-r--r--packages/utils.py25
1 files changed, 15 insertions, 10 deletions
diff --git a/packages/utils.py b/packages/utils.py
index 5f0c111e..a72404f4 100644
--- a/packages/utils.py
+++ b/packages/utils.py
@@ -1,6 +1,6 @@
from collections import defaultdict
from itertools import chain
-from operator import itemgetter
+from operator import attrgetter, itemgetter
import re
from django.core.serializers.json import DjangoJSONEncoder
@@ -108,10 +108,15 @@ class Difference(object):
css_classes.append(self.pkg_b.arch.name)
return ' '.join(css_classes)
- def __cmp__(self, other):
- if isinstance(other, Difference):
- return cmp(self.__dict__, other.__dict__)
- return False
+ def __key(self):
+ return (self.pkgname, hash(self.repo),
+ hash(self.pkg_a), hash(self.pkg_b))
+
+ def __eq__(self, other):
+ return self.__key() == other.__key()
+
+ def __hash__(self):
+ return hash(self.__key())
@cache_function(127)
@@ -146,8 +151,8 @@ SELECT p.id, q.id
to_fetch = {row[0] for row in results}
# fetch all of the necessary packages
pkgs = Package.objects.normal().in_bulk(to_fetch)
- # now build a list of tuples containing differences
- differences = []
+ # now build a set containing differences
+ differences = set()
for row in results:
pkg_a = pkgs.get(row[0])
pkg_b = pkgs.get(row[1])
@@ -160,11 +165,11 @@ SELECT p.id, q.id
name = pkg_a.pkgname if pkg_a else pkg_b.pkgname
repo = pkg_a.repo if pkg_a else pkg_b.repo
item = Difference(name, repo, pkg_b, pkg_a)
- if item not in differences:
- differences.append(item)
+ differences.add(item)
# now sort our list by repository, package name
- differences.sort(key=lambda a: (a.repo.name, a.pkgname))
+ key_func = attrgetter('repo.name', 'pkgname')
+ differences = sorted(differences, key=key_func)
return differences