From 1ff2e37e049004852681794537417a1947bf6f18 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 19 Oct 2014 14:19:05 -0500 Subject: Simplify last modified and etags processing for feeds We had this elaborate system set up with caching and invalidation, which is overkill since we cache the result of the view anyway. Just hit the database when needed to find the last change to the respective model class and be done with it. Signed-off-by: Dan McGee --- feeds.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'feeds.py') diff --git a/feeds.py b/feeds.py index feb8a84a..d1836178 100644 --- a/feeds.py +++ b/feeds.py @@ -4,11 +4,11 @@ from pytz import utc from django.contrib.sites.models import Site from django.contrib.syndication.views import Feed +from django.db import connection from django.db.models import Q from django.utils.feedgenerator import Rss201rev2Feed from django.views.decorators.http import condition -from main.utils import retrieve_latest from main.models import Arch, Repo, Package from news.models import News from releng.models import Release @@ -64,13 +64,15 @@ class GuidNotPermalinkFeed(Rss201rev2Feed): def package_etag(request, *args, **kwargs): - latest = retrieve_latest(Package) + latest = package_last_modified(request) if latest: return hashlib.md5(str(kwargs) + str(latest)).hexdigest() return None def package_last_modified(request, *args, **kwargs): - return retrieve_latest(Package) + cursor = connection.cursor() + cursor.execute("SELECT MAX(last_update) FROM packages") + return cursor.fetchone()[0] class PackageFeed(Feed): @@ -148,13 +150,15 @@ class PackageFeed(Feed): def news_etag(request, *args, **kwargs): - latest = retrieve_latest(News, 'last_modified') + latest = news_last_modified(request) if latest: return hashlib.md5(str(latest)).hexdigest() return None def news_last_modified(request, *args, **kwargs): - return retrieve_latest(News, 'last_modified') + cursor = connection.cursor() + cursor.execute("SELECT MAX(last_modified) FROM news") + return cursor.fetchone()[0] class NewsFeed(Feed): -- cgit v1.2.3-2-g168b From f7d1940a731370ceee6e1c6eaae2cc2f5bab0432 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 19 Oct 2014 18:30:32 -0500 Subject: Remove usage of templates for RSS feeds Signed-off-by: Dan McGee --- feeds.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'feeds.py') diff --git a/feeds.py b/feeds.py index d1836178..0bbac270 100644 --- a/feeds.py +++ b/feeds.py @@ -79,7 +79,6 @@ class PackageFeed(Feed): feed_type = GuidNotPermalinkFeed link = '/packages/' - title_template = 'feeds/packages_title.html' def __call__(self, request, *args, **kwargs): wrapper = condition(etag_func=package_etag, last_modified_func=package_last_modified) @@ -142,6 +141,9 @@ class PackageFeed(Feed): def item_pubdate(self, item): return item.last_update + def item_title(self, item): + return '%s %s %s' % (item.pkgname, item.full_version, item.arch.name) + def item_description(self, item): return item.pkgdesc @@ -168,7 +170,6 @@ class NewsFeed(Feed): link = '/news/' description = 'The latest and greatest news from the Arch Linux distribution.' subtitle = description - description_template = 'feeds/news_description.html' def __call__(self, request, *args, **kwargs): wrapper = condition(etag_func=news_etag, last_modified_func=news_last_modified) @@ -192,6 +193,9 @@ class NewsFeed(Feed): def item_title(self, item): return item.title + def item_description(self, item): + return item.html() + class ReleaseFeed(Feed): feed_type = GuidNotPermalinkFeed -- cgit v1.2.3-2-g168b From 15bb0e7101aa9bfa3f63e8ef44f4a8e1e310e3c1 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 26 Oct 2014 21:44:11 -0500 Subject: Remove Etag header from feeds We have Last-Modified here, and from what I can tell with some more reading and playing with caching, it isn't necessarily wise to set both of them in the same response. Set the one that we actually trust. Signed-off-by: Dan McGee --- feeds.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'feeds.py') diff --git a/feeds.py b/feeds.py index 0bbac270..cb01fbb1 100644 --- a/feeds.py +++ b/feeds.py @@ -1,5 +1,4 @@ from datetime import datetime, time -import hashlib from pytz import utc from django.contrib.sites.models import Site @@ -63,12 +62,6 @@ class GuidNotPermalinkFeed(Rss201rev2Feed): wrapper.flush() -def package_etag(request, *args, **kwargs): - latest = package_last_modified(request) - if latest: - return hashlib.md5(str(kwargs) + str(latest)).hexdigest() - return None - def package_last_modified(request, *args, **kwargs): cursor = connection.cursor() cursor.execute("SELECT MAX(last_update) FROM packages") @@ -81,7 +74,7 @@ class PackageFeed(Feed): link = '/packages/' def __call__(self, request, *args, **kwargs): - wrapper = condition(etag_func=package_etag, last_modified_func=package_last_modified) + wrapper = condition(last_modified_func=package_last_modified) return wrapper(super(PackageFeed, self).__call__)(request, *args, **kwargs) __name__ = 'package_feed' @@ -151,12 +144,6 @@ class PackageFeed(Feed): return (item.repo.name, item.arch.name) -def news_etag(request, *args, **kwargs): - latest = news_last_modified(request) - if latest: - return hashlib.md5(str(latest)).hexdigest() - return None - def news_last_modified(request, *args, **kwargs): cursor = connection.cursor() cursor.execute("SELECT MAX(last_modified) FROM news") @@ -172,7 +159,7 @@ class NewsFeed(Feed): subtitle = description def __call__(self, request, *args, **kwargs): - wrapper = condition(etag_func=news_etag, last_modified_func=news_last_modified) + wrapper = condition(last_modified_func=news_last_modified) return wrapper(super(NewsFeed, self).__call__)(request, *args, **kwargs) __name__ = 'news_feed' -- cgit v1.2.3-2-g168b From 8e990ca945f4fc5b27c0a81a484a107c83aa69d3 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 26 Oct 2014 21:56:24 -0500 Subject: Kill now unneeded workaround for Django issue #9800 Signed-off-by: Dan McGee --- feeds.py | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) (limited to 'feeds.py') diff --git a/feeds.py b/feeds.py index cb01fbb1..24611e7e 100644 --- a/feeds.py +++ b/feeds.py @@ -18,38 +18,20 @@ class BatchWritesWrapper(object): self.outfile = outfile self.chunks = chunks self.buf = [] + def write(self, s): buf = self.buf buf.append(s) if len(buf) >= self.chunks: self.outfile.write(''.join(buf)) self.buf = [] + def flush(self): self.outfile.write(''.join(self.buf)) self.outfile.flush() -class GuidNotPermalinkFeed(Rss201rev2Feed): - @staticmethod - def check_for_unique_id(f): - def wrapper(name, contents=None, attrs=None): - if attrs is None: - attrs = {} - if name == 'guid': - attrs['isPermaLink'] = 'false' - return f(name, contents, attrs) - return wrapper - - def write_items(self, handler): - ''' - Totally disgusting. Monkey-patch the handler so if it sees a - 'unique-id' field come through, add an isPermalink="false" attribute. - Workaround for http://code.djangoproject.com/ticket/9800 - ''' - handler.addQuickElement = self.check_for_unique_id( - handler.addQuickElement) - super(GuidNotPermalinkFeed, self).write_items(handler) - +class FasterRssFeed(Rss201rev2Feed): def write(self, outfile, encoding): ''' Batch the underlying 'write' calls on the outfile because Python's @@ -58,7 +40,7 @@ class GuidNotPermalinkFeed(Rss201rev2Feed): '>' closing tags and over 1600 write calls in our package feed. ''' wrapper = BatchWritesWrapper(outfile) - super(GuidNotPermalinkFeed, self).write(wrapper, encoding) + super(FasterRssFeed, self).write(wrapper, encoding) wrapper.flush() @@ -69,7 +51,7 @@ def package_last_modified(request, *args, **kwargs): class PackageFeed(Feed): - feed_type = GuidNotPermalinkFeed + feed_type = FasterRssFeed link = '/packages/' @@ -124,6 +106,8 @@ class PackageFeed(Feed): def items(self, obj): return obj['qs'] + item_guid_is_permalink = False + def item_guid(self, item): # http://diveintomark.org/archives/2004/05/28/howto-atom-id date = item.last_update @@ -151,7 +135,7 @@ def news_last_modified(request, *args, **kwargs): class NewsFeed(Feed): - feed_type = GuidNotPermalinkFeed + feed_type = FasterRssFeed title = 'Arch Linux: Recent news updates' link = '/news/' @@ -168,6 +152,8 @@ class NewsFeed(Feed): return News.objects.select_related('author').order_by( '-postdate', '-id')[:10] + item_guid_is_permalink = False + def item_guid(self, item): return item.guid @@ -185,7 +171,7 @@ class NewsFeed(Feed): class ReleaseFeed(Feed): - feed_type = GuidNotPermalinkFeed + feed_type = FasterRssFeed title = 'Arch Linux: Releases' link = '/download/' @@ -206,6 +192,8 @@ class ReleaseFeed(Feed): def item_pubdate(self, item): return datetime.combine(item.release_date, time()).replace(tzinfo=utc) + item_guid_is_permalink = False + def item_guid(self, item): # http://diveintomark.org/archives/2004/05/28/howto-atom-id date = item.release_date -- cgit v1.2.3-2-g168b From 9c701ebba1f2ef403aab95354a8ae4efdb7df74c Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 26 Oct 2014 22:06:08 -0500 Subject: Double batch size in BatchWritesWrapper Signed-off-by: Dan McGee --- feeds.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'feeds.py') diff --git a/feeds.py b/feeds.py index 24611e7e..7247e95c 100644 --- a/feeds.py +++ b/feeds.py @@ -14,15 +14,14 @@ from releng.models import Release class BatchWritesWrapper(object): - def __init__(self, outfile, chunks=20): + def __init__(self, outfile): self.outfile = outfile - self.chunks = chunks self.buf = [] def write(self, s): buf = self.buf buf.append(s) - if len(buf) >= self.chunks: + if len(buf) >= 40: self.outfile.write(''.join(buf)) self.buf = [] -- cgit v1.2.3-2-g168b From bd2bc6a1c58723502ef6c2e9f49248908a161b13 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Sun, 26 Oct 2014 22:23:56 -0500 Subject: Add last modified date to releases Signed-off-by: Dan McGee --- feeds.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'feeds.py') diff --git a/feeds.py b/feeds.py index 7247e95c..0f7fa5d7 100644 --- a/feeds.py +++ b/feeds.py @@ -159,6 +159,9 @@ class NewsFeed(Feed): def item_pubdate(self, item): return item.postdate + def item_updateddate(self, item): + return item.last_modified + def item_author_name(self, item): return item.author.get_full_name() @@ -191,6 +194,9 @@ class ReleaseFeed(Feed): def item_pubdate(self, item): return datetime.combine(item.release_date, time()).replace(tzinfo=utc) + def item_updateddate(self, item): + return item.last_modified + item_guid_is_permalink = False def item_guid(self, item): -- cgit v1.2.3-2-g168b