From badc535aeb1d310a9b8aa59aade07045e6eae653 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 18 Apr 2012 15:05:43 -0500 Subject: Ensure order_by default value is cleared when using distinct() Otherwise the queryset returns nonsensical results. I find the design of this less than obvious but so be it; we can ensure the results work regardless of a default ordering on the model. Signed-off-by: Dan McGee --- packages/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index a3c13b17..8d00bd68 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -218,7 +218,8 @@ def attach_maintainers(packages): packages = list(packages) pkgbases = set(p.pkgbase for p in packages) rels = PackageRelation.objects.filter(type=PackageRelation.MAINTAINER, - pkgbase__in=pkgbases).values_list('pkgbase', 'user_id').distinct() + pkgbase__in=pkgbases).values_list( + 'pkgbase', 'user_id').order_by().distinct() # get all the user objects we will need user_ids = set(rel[1] for rel in rels) -- cgit v1.2.3-2-g168b From 72a92102df4999dbcc370064707c9026d51c4fe7 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 18 May 2012 21:29:03 -0500 Subject: Switch to usage of new Depend object Signed-off-by: Dan McGee --- packages/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index 8d00bd68..82313472 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -7,10 +7,10 @@ from django.db import connection from django.db.models import Count, Max, F from django.contrib.auth.models import User -from main.models import Package, PackageDepend, PackageFile, Arch, Repo +from main.models import Package, PackageFile, Arch, Repo from main.utils import cache_function, groupby_preserve_order, PackageStandin from .models import (PackageGroup, PackageRelation, - License, Conflict, Provision, Replacement, + License, Depend, Conflict, Provision, Replacement, SignoffSpecification, Signoff, DEFAULT_SIGNOFF_SPEC) @cache_function(127) @@ -451,7 +451,7 @@ class PackageJSONEncoder(DjangoJSONEncoder): return obj.name.lower() if isinstance(obj, (PackageGroup, License)): return obj.name - if isinstance(obj, (Conflict, Provision, Replacement, PackageDepend)): + if isinstance(obj, (Depend, Conflict, Provision, Replacement)): return unicode(obj) elif isinstance(obj, User): return obj.username -- cgit v1.2.3-2-g168b From b95b0cd4197d70831754a7e81b40388c37ab1a3d Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 8 Jul 2012 20:51:23 -0500 Subject: Use a set instead of list when gathering package IDs to fetch If we have duplicates in this list, it makes no sense to include them in the list we send to the database. Signed-off-by: Dan McGee --- packages/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index 82313472..b86b6eba 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -123,7 +123,7 @@ SELECT p.id, q.id cursor.execute(sql, [arch_a.id, arch_b.id]) results = cursor.fetchall() # column A will always have a value, column B might be NULL - to_fetch = [row[0] for row in results] + to_fetch = set(row[0] for row in results) # fetch all of the necessary packages pkgs = Package.objects.normal().in_bulk(to_fetch) # now build a list of tuples containing differences -- cgit v1.2.3-2-g168b From 3c906888e2ba9e55cef00dfc61667fb383c9754d Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 8 Jul 2012 20:44:07 -0500 Subject: Get multilib package differences query working on sqlite3 Thank you database engines for all implementing such simple operations as substring() and length() in different ways. Signed-off-by: Dan McGee --- packages/utils.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index b86b6eba..6d54d71a 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -8,7 +8,8 @@ from django.db.models import Count, Max, F from django.contrib.auth.models import User from main.models import Package, PackageFile, Arch, Repo -from main.utils import cache_function, groupby_preserve_order, PackageStandin +from main.utils import (cache_function, database_vendor, + groupby_preserve_order, PackageStandin) from .models import (PackageGroup, PackageRelation, License, Depend, Conflict, Provision, Replacement, SignoffSpecification, Signoff, DEFAULT_SIGNOFF_SPEC) @@ -150,12 +151,18 @@ SELECT p.id, q.id def multilib_differences(): # Query for checking multilib out of date-ness - sql = """ -SELECT ml.id, reg.id - FROM packages ml - JOIN packages reg - ON ( - reg.pkgname = ( + if database_vendor(Package) == 'sqlite': + pkgname_sql = """ + CASE WHEN ml.pkgname LIKE %s + THEN SUBSTR(ml.pkgname, 7) + WHEN ml.pkgname LIKE %s + THEN SUBSTR(ml.pkgname, 1, LENGTH(ml.pkgname) - 9) + ELSE + ml.pkgname + END + """ + else: + pkgname_sql = """ CASE WHEN ml.pkgname LIKE %s THEN SUBSTRING(ml.pkgname, 7) WHEN ml.pkgname LIKE %s @@ -163,7 +170,13 @@ SELECT ml.id, reg.id ELSE ml.pkgname END - ) + """ + sql = """ +SELECT ml.id, reg.id + FROM packages ml + JOIN packages reg + ON ( + reg.pkgname = (""" + pkgname_sql + """) AND reg.pkgver != ml.pkgver ) JOIN repos r ON reg.repo_id = r.id @@ -172,7 +185,7 @@ SELECT ml.id, reg.id AND r.staging = %s AND reg.arch_id = %s ORDER BY ml.last_update -""" + """ multilib = Repo.objects.get(name__iexact='multilib') i686 = Arch.objects.get(name='i686') params = ['lib32-%', '%-multilib', multilib.id, False, False, i686.id] -- cgit v1.2.3-2-g168b From 241ff8fbd79f9f17cd326a34eb39096851f630ba Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 8 Aug 2012 22:07:06 -0500 Subject: Extract parse_version function from reporead logic Signed-off-by: Dan McGee --- packages/utils.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index 6d54d71a..d4b4e611 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -1,6 +1,7 @@ from collections import defaultdict from itertools import chain from operator import itemgetter +import re from django.core.serializers.json import DjangoJSONEncoder from django.db import connection @@ -14,6 +15,23 @@ from .models import (PackageGroup, PackageRelation, License, Depend, Conflict, Provision, Replacement, SignoffSpecification, Signoff, DEFAULT_SIGNOFF_SPEC) + +VERSION_RE = re.compile(r'^((\d+):)?(.+)-([^-]+)$') + + +def parse_version(version): + match = VERSION_RE.match(version) + if not match: + return None, None, 0 + ver = match.group(3) + rel = match.group(4) + if match.group(2): + epoch = int(match.group(2)) + else: + epoch = 0 + return ver, rel, epoch + + @cache_function(127) def get_group_info(include_arches=None): raw_groups = PackageGroup.objects.values_list( -- cgit v1.2.3-2-g168b From ad05f3eb2c8511c63dbdc9378bf3561ab949e940 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 8 Aug 2012 22:07:38 -0500 Subject: PEP8 cleanups in package utils Signed-off-by: Dan McGee --- packages/utils.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index d4b4e611..d95c015f 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -36,7 +36,7 @@ def parse_version(version): def get_group_info(include_arches=None): raw_groups = PackageGroup.objects.values_list( 'name', 'pkg__arch__name').order_by('name').annotate( - cnt=Count('pkg'), last_update=Max('pkg__last_update')) + cnt=Count('pkg'), last_update=Max('pkg__last_update')) # now for post_processing. we need to seperate things out and add # the count in for 'any' to all of the other architectures. group_mapping = {} @@ -71,6 +71,7 @@ def get_group_info(include_arches=None): groups.extend(val.itervalues()) return sorted(groups, key=itemgetter('name', 'arch')) + def get_split_packages_info(): '''Return info on split packages that do not have an actual package name matching the split pkgbase.''' @@ -276,6 +277,7 @@ def approved_by_signoffs(signoffs, spec): return good_signoffs >= spec.required return False + class PackageSignoffGroup(object): '''Encompasses all packages in testing with the same pkgbase.''' def __init__(self, packages): @@ -375,6 +377,7 @@ SELECT DISTINCT s.id AND p.repo_id IN (%s) """ + def get_current_signoffs(repos): '''Returns a mapping of pkgbase -> signoff objects for the given repos.''' cursor = connection.cursor() @@ -389,6 +392,7 @@ def get_current_signoffs(repos): signoffs = Signoff.objects.select_related('user').in_bulk(to_fetch) return signoffs.values() + def get_current_specifications(repos): '''Returns a mapping of pkgbase -> signoff specification objects for the given repos.''' @@ -401,6 +405,7 @@ def get_current_specifications(repos): to_fetch = [row[0] for row in results] return SignoffSpecification.objects.in_bulk(to_fetch).values() + def get_target_repo_map(repos): sql = """ SELECT DISTINCT p1.pkgbase, r.name @@ -421,6 +426,7 @@ SELECT DISTINCT p1.pkgbase, r.name cursor.execute(sql, params) return dict(cursor.fetchall()) + def get_signoff_groups(repos=None, user=None): if repos is None: repos = Repo.objects.filter(testing=True) @@ -458,12 +464,12 @@ def get_signoff_groups(repos=None, user=None): class PackageJSONEncoder(DjangoJSONEncoder): - pkg_attributes = [ 'pkgname', 'pkgbase', 'repo', 'arch', 'pkgver', + pkg_attributes = ['pkgname', 'pkgbase', 'repo', 'arch', 'pkgver', 'pkgrel', 'epoch', 'pkgdesc', 'url', 'filename', 'compressed_size', 'installed_size', 'build_date', 'last_update', 'flag_date', - 'maintainers', 'packager' ] - pkg_list_attributes = [ 'groups', 'licenses', 'conflicts', - 'provides', 'replaces', 'depends' ] + 'maintainers', 'packager'] + pkg_list_attributes = ['groups', 'licenses', 'conflicts', + 'provides', 'replaces', 'depends'] def default(self, obj): if hasattr(obj, '__iter__'): @@ -488,5 +494,4 @@ class PackageJSONEncoder(DjangoJSONEncoder): return obj.username return super(PackageJSONEncoder, self).default(obj) - # vim: set ts=4 sw=4 et: -- cgit v1.2.3-2-g168b From a071d800c6a26d3efcdc0d32fe1adb1cde7e6f31 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Wed, 15 Aug 2012 08:22:01 -0500 Subject: Fix signoffs SQL query Although the old query returned the same results, the repos IN clause should really be a part of the WHERE, not the JOIN condition. Signed-off-by: Dan McGee --- packages/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index d95c015f..ee1b56b3 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -363,6 +363,7 @@ class PackageSignoffGroup(object): return u'%s-%s (%s): %d' % ( self.pkgbase, self.version, self.arch, len(self.signoffs)) + _SQL_SPEC_OR_SIGNOFF = """ SELECT DISTINCT s.id FROM %s s @@ -374,7 +375,7 @@ SELECT DISTINCT s.id AND s.arch_id = p.arch_id AND s.repo_id = p.repo_id ) - AND p.repo_id IN (%s) + WHERE p.repo_id IN (%s) """ -- cgit v1.2.3-2-g168b From a71aa2e354599950f4bd464f0f19215f1c581141 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 12 Oct 2012 11:34:49 -0500 Subject: Make wrong permissions query more efficient This removes the subplan and per-row query in favor of a LEFT JOIN where we look for non-matching rows. Tested in sqlite3 and PostgreSQL. Signed-off-by: Dan McGee --- packages/utils.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index ee1b56b3..c29e2297 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -228,12 +228,13 @@ SELECT DISTINCT id FROM packages p JOIN packages_packagerelation pr ON p.pkgbase = pr.pkgbase WHERE pr.type = %s - ) pkgs - WHERE pkgs.repo_id NOT IN ( - SELECT repo_id FROM user_profiles_allowed_repos ar + ) mp + LEFT JOIN ( + SELECT user_id, repo_id FROM user_profiles_allowed_repos ar INNER JOIN user_profiles up ON ar.userprofile_id = up.id - WHERE up.user_id = pkgs.user_id - ) + ) ur + ON mp.user_id = ur.user_id AND mp.repo_id = ur.repo_id + WHERE ur.user_id IS NULL; """ cursor = connection.cursor() cursor.execute(sql, [PackageRelation.MAINTAINER]) -- cgit v1.2.3-2-g168b From 0b3aa29cb63c6ca07f066a4a68fa3df9b92f6216 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 14 Oct 2012 15:42:15 -0500 Subject: Refactor signoff-grabbing queries Make them a bit more efficient by adding an explicit condition on both the packages and signoff table for the repo ID, and move the common code into a shared function both can use. Signed-off-by: Dan McGee --- packages/utils.py | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index c29e2297..051fed8e 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -365,7 +365,8 @@ class PackageSignoffGroup(object): self.pkgbase, self.version, self.arch, len(self.signoffs)) -_SQL_SPEC_OR_SIGNOFF = """ +def signoffs_id_query(model, repos): + sql = """ SELECT DISTINCT s.id FROM %s s JOIN packages p ON ( @@ -377,34 +378,29 @@ SELECT DISTINCT s.id AND s.repo_id = p.repo_id ) WHERE p.repo_id IN (%s) -""" - - -def get_current_signoffs(repos): - '''Returns a mapping of pkgbase -> signoff objects for the given repos.''' + AND s.repo_id IN (%s) + """ cursor = connection.cursor() # query pre-process- fill in table name and placeholders for IN - sql = _SQL_SPEC_OR_SIGNOFF % ('packages_signoff', - ','.join(['%s' for r in repos])) - cursor.execute(sql, [r.pk for r in repos]) + repo_sql = ','.join(['%s' for r in repos]) + sql = sql % (model._meta.db_table, repo_sql, repo_sql) + repo_ids = [r.pk for r in repos] + # repo_ids are needed twice, so double the array + cursor.execute(sql, repo_ids * 2) results = cursor.fetchall() - # fetch all of the returned signoffs by ID - to_fetch = [row[0] for row in results] - signoffs = Signoff.objects.select_related('user').in_bulk(to_fetch) - return signoffs.values() + return [row[0] for row in results] -def get_current_specifications(repos): - '''Returns a mapping of pkgbase -> signoff specification objects for the - given repos.''' - cursor = connection.cursor() - sql = _SQL_SPEC_OR_SIGNOFF % ('packages_signoffspecification', - ','.join(['%s' for r in repos])) - cursor.execute(sql, [r.pk for r in repos]) +def get_current_signoffs(repos): + '''Returns a list of signoff objects for the given repos.''' + to_fetch = signoffs_id_query(Signoff, repos) + return Signoff.objects.select_related('user').in_bulk(to_fetch).values() - results = cursor.fetchall() - to_fetch = [row[0] for row in results] + +def get_current_specifications(repos): + '''Returns a list of signoff specification objects for the given repos.''' + to_fetch = signoffs_id_query(SignoffSpecification, repos) return SignoffSpecification.objects.in_bulk(to_fetch).values() -- cgit v1.2.3-2-g168b From 6dd4d54bb0adbbb0f8c2b1beaa92b7a58971cf88 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 16 Nov 2012 16:20:11 -0600 Subject: Use Python 2.7 dictionary comprehension syntax Rather than the old idiom of dict((k, v) for <> in <>). Signed-off-by: Dan McGee --- packages/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index 051fed8e..199e141d 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -385,7 +385,7 @@ SELECT DISTINCT s.id repo_sql = ','.join(['%s' for r in repos]) sql = sql % (model._meta.db_table, repo_sql, repo_sql) repo_ids = [r.pk for r in repos] - # repo_ids are needed twice, so double the array + # repo_ids are needed twice, so double the array cursor.execute(sql, repo_ids * 2) results = cursor.fetchall() @@ -474,8 +474,7 @@ class PackageJSONEncoder(DjangoJSONEncoder): # mainly for queryset serialization return list(obj) if isinstance(obj, Package): - data = dict((attr, getattr(obj, attr)) - for attr in self.pkg_attributes) + data = {attr: getattr(obj, attr) for attr in self.pkg_attributes} for attr in self.pkg_list_attributes: data[attr] = getattr(obj, attr).all() return data -- cgit v1.2.3-2-g168b From 9e9157d0a8cbf9ea076231e438fb30f58bff8e29 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 16 Nov 2012 16:37:31 -0600 Subject: Use python set comprehension syntax supported in 2.7 Signed-off-by: Dan McGee --- packages/utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index 199e141d..5adc8637 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -79,8 +79,8 @@ def get_split_packages_info(): split_pkgs = Package.objects.exclude(pkgname=F('pkgbase')).exclude( pkgbase__in=pkgnames).values('pkgbase', 'repo', 'arch').annotate( last_update=Max('last_update')) - all_arches = Arch.objects.in_bulk(set(s['arch'] for s in split_pkgs)) - all_repos = Repo.objects.in_bulk(set(s['repo'] for s in split_pkgs)) + all_arches = Arch.objects.in_bulk({s['arch'] for s in split_pkgs}) + all_repos = Repo.objects.in_bulk({s['repo'] for s in split_pkgs}) for split in split_pkgs: split['arch'] = all_arches[split['arch']] split['repo'] = all_repos[split['repo']] @@ -143,7 +143,7 @@ SELECT p.id, q.id cursor.execute(sql, [arch_a.id, arch_b.id]) results = cursor.fetchall() # column A will always have a value, column B might be NULL - to_fetch = set(row[0] for row in results) + to_fetch = {row[0] for row in results} # fetch all of the necessary packages pkgs = Package.objects.normal().in_bulk(to_fetch) # now build a list of tuples containing differences @@ -249,13 +249,13 @@ def attach_maintainers(packages): the maintainers and attach them to the packages to prevent N+1 query cascading.''' packages = list(packages) - pkgbases = set(p.pkgbase for p in packages) + pkgbases = {p.pkgbase for p in packages} rels = PackageRelation.objects.filter(type=PackageRelation.MAINTAINER, pkgbase__in=pkgbases).values_list( 'pkgbase', 'user_id').order_by().distinct() # get all the user objects we will need - user_ids = set(rel[1] for rel in rels) + user_ids = {rel[1] for rel in rels} users = User.objects.in_bulk(user_ids) # now build a pkgbase -> [maintainers...] map -- cgit v1.2.3-2-g168b From b801818eeed1068595cea863e9ae427f3931f925 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 27 Dec 2012 23:25:51 -0600 Subject: Make attach_maintainers null-safe Signed-off-by: Dan McGee --- packages/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index 5adc8637..5f0c111e 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -249,7 +249,7 @@ def attach_maintainers(packages): the maintainers and attach them to the packages to prevent N+1 query cascading.''' packages = list(packages) - pkgbases = {p.pkgbase for p in packages} + pkgbases = {p.pkgbase for p in packages if p is not None} rels = PackageRelation.objects.filter(type=PackageRelation.MAINTAINER, pkgbase__in=pkgbases).values_list( 'pkgbase', 'user_id').order_by().distinct() @@ -266,6 +266,8 @@ def attach_maintainers(packages): annotated = [] # and finally, attach the maintainer lists on the original packages for package in packages: + if package is None: + continue package.maintainers = maintainers[package.pkgbase] annotated.append(package) -- cgit v1.2.3-2-g168b From 375684ed91dd5499e7a4ea7787e45803e8467e16 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Fri, 18 Jan 2013 20:52:20 -0600 Subject: Use a set instead of list when gathering package differences If we implement the __eq__ and __hash__ methods, we can use a set to gather package difference objects and make deduplication of objects a lot more efficient. Signed-off-by: Dan McGee --- packages/utils.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index 5f0c111e..a72404f4 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -1,6 +1,6 @@ from collections import defaultdict from itertools import chain -from operator import itemgetter +from operator import attrgetter, itemgetter import re from django.core.serializers.json import DjangoJSONEncoder @@ -108,10 +108,15 @@ class Difference(object): css_classes.append(self.pkg_b.arch.name) return ' '.join(css_classes) - def __cmp__(self, other): - if isinstance(other, Difference): - return cmp(self.__dict__, other.__dict__) - return False + def __key(self): + return (self.pkgname, hash(self.repo), + hash(self.pkg_a), hash(self.pkg_b)) + + def __eq__(self, other): + return self.__key() == other.__key() + + def __hash__(self): + return hash(self.__key()) @cache_function(127) @@ -146,8 +151,8 @@ SELECT p.id, q.id to_fetch = {row[0] for row in results} # fetch all of the necessary packages pkgs = Package.objects.normal().in_bulk(to_fetch) - # now build a list of tuples containing differences - differences = [] + # now build a set containing differences + differences = set() for row in results: pkg_a = pkgs.get(row[0]) pkg_b = pkgs.get(row[1]) @@ -160,11 +165,11 @@ SELECT p.id, q.id name = pkg_a.pkgname if pkg_a else pkg_b.pkgname repo = pkg_a.repo if pkg_a else pkg_b.repo item = Difference(name, repo, pkg_b, pkg_a) - if item not in differences: - differences.append(item) + differences.add(item) # now sort our list by repository, package name - differences.sort(key=lambda a: (a.repo.name, a.pkgname)) + key_func = attrgetter('repo.name', 'pkgname') + differences = sorted(differences, key=key_func) return differences -- cgit v1.2.3-2-g168b From 2c958511c41f53fb7de49ed4662eec966e0b76a5 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Tue, 22 Jan 2013 16:48:49 -0600 Subject: Use a subquery rather than two queries in attach_maintainers Now that we are using a database that doesn't stink, it makes more sense to do all of the stuff we need to do down at the database level. This helps a lot when 500+ packages are in play at a given time, such as some of our larger rebuild todo lists. Signed-off-by: Dan McGee --- packages/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index a72404f4..49aeb8ce 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -6,6 +6,7 @@ import re from django.core.serializers.json import DjangoJSONEncoder from django.db import connection from django.db.models import Count, Max, F +from django.db.models.query import QuerySet from django.contrib.auth.models import User from main.models import Package, PackageFile, Arch, Repo @@ -253,8 +254,11 @@ def attach_maintainers(packages): '''Given a queryset or something resembling it of package objects, find all the maintainers and attach them to the packages to prevent N+1 query cascading.''' - packages = list(packages) - pkgbases = {p.pkgbase for p in packages if p is not None} + if isinstance(packages, QuerySet): + pkgbases = packages.values('pkgbase') + else: + packages = list(packages) + pkgbases = {p.pkgbase for p in packages if p is not None} rels = PackageRelation.objects.filter(type=PackageRelation.MAINTAINER, pkgbase__in=pkgbases).values_list( 'pkgbase', 'user_id').order_by().distinct() -- cgit v1.2.3-2-g168b From 5bc85244281efc916132c86046018d0ebe70b5e9 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 10 Feb 2013 12:45:24 -0600 Subject: Fix split packages sitemap We had a ton of duplicate entries included due to the query implicitly including a 'GROUP BY' clause on the default sorting by pkgname. Fix it and cut the sitemap down to the correct size without duplicate entries. Signed-off-by: Dan McGee --- packages/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index 49aeb8ce..ef6311eb 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -79,7 +79,7 @@ def get_split_packages_info(): pkgnames = Package.objects.values('pkgname') split_pkgs = Package.objects.exclude(pkgname=F('pkgbase')).exclude( pkgbase__in=pkgnames).values('pkgbase', 'repo', 'arch').annotate( - last_update=Max('last_update')) + last_update=Max('last_update')).order_by().distinct() all_arches = Arch.objects.in_bulk({s['arch'] for s in split_pkgs}) all_repos = Repo.objects.in_bulk({s['repo'] for s in split_pkgs}) for split in split_pkgs: -- cgit v1.2.3-2-g168b From 1f2a6384f332e75e9befc13b5a4b7b2906db6c50 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Mon, 11 Mar 2013 21:25:27 -0500 Subject: Remove function caching in packages/utils We don't see these called enough to make caching the data worth it. Signed-off-by: Dan McGee --- packages/utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'packages/utils.py') diff --git a/packages/utils.py b/packages/utils.py index ef6311eb..a4217fbd 100644 --- a/packages/utils.py +++ b/packages/utils.py @@ -10,7 +10,7 @@ from django.db.models.query import QuerySet from django.contrib.auth.models import User from main.models import Package, PackageFile, Arch, Repo -from main.utils import (cache_function, database_vendor, +from main.utils import (database_vendor, groupby_preserve_order, PackageStandin) from .models import (PackageGroup, PackageRelation, License, Depend, Conflict, Provision, Replacement, @@ -33,7 +33,6 @@ def parse_version(version): return ver, rel, epoch -@cache_function(127) def get_group_info(include_arches=None): raw_groups = PackageGroup.objects.values_list( 'name', 'pkg__arch__name').order_by('name').annotate( @@ -120,7 +119,6 @@ class Difference(object): return hash(self.__key()) -@cache_function(127) def get_differences_info(arch_a, arch_b): # This is a monster. Join packages against itself, looking for packages in # our non-'any' architectures only, and not having a corresponding package -- cgit v1.2.3-2-g168b