From 9f4902f9c921b82f924fe0af106fa5480ca10ca9 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 7 Apr 2011 14:39:14 -0500 Subject: Ensure feed GUIDs are unchanging and unique Implement 'tag:' style URIs for the GUID field on our RSS feeds. This ensures new package updates show up as new, and we aren't jumping back and forth between generated GUIDs having 'http://' and 'https://' prefixes. Much of the work here is to attempt to keep old news GUIDs constant so we don't once again make everything show up as new in newsreaders. Signed-off-by: Dan McGee --- feeds.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'feeds.py') diff --git a/feeds.py b/feeds.py index cdba9913..d2043271 100644 --- a/feeds.py +++ b/feeds.py @@ -1,6 +1,7 @@ import datetime from decimal import Decimal, ROUND_HALF_DOWN +from django.contrib.sites.models import Site from django.contrib.syndication.views import Feed from django.core.cache import cache from django.db.models import Q @@ -97,9 +98,18 @@ class PackageFeed(Feed): s += '.' return s + subtitle = description + def items(self, obj): return obj['qs'] + def item_guid(self, item): + # http://diveintomark.org/archives/2004/05/28/howto-atom-id + date = item.last_update + return 'tag:%s,%s:%s%s' % (Site.objects.get_current().domain, + date.strftime('%Y-%m-%d'), item.get_absolute_url(), + date.strftime('%Y%m%d%H%M')) + def item_pubdate(self, item): return item.last_update @@ -135,6 +145,7 @@ class NewsFeed(Feed): title = 'Arch Linux: Recent news updates' link = '/news/' description = 'The latest and greatest news from the Arch Linux distribution.' + subtitle = description title_template = 'feeds/news_title.html' description_template = 'feeds/news_description.html' @@ -146,6 +157,9 @@ class NewsFeed(Feed): return News.objects.select_related('author').order_by( '-postdate', '-id')[:10] + def item_guid(self, item): + return item.guid + def item_pubdate(self, item): return item.postdate -- cgit v1.2.3-2-g168b From 0d3e1eb796d673607bb8beb91c61114209fd9155 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 7 Apr 2011 15:07:37 -0500 Subject: Add a horrible hack to allow feed guid value to not be a permalink Django, you make the simplest things so damn hard sometimes. Signed-off-by: Dan McGee --- feeds.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'feeds.py') diff --git a/feeds.py b/feeds.py index d2043271..7a2f2e40 100644 --- a/feeds.py +++ b/feeds.py @@ -5,6 +5,7 @@ from django.contrib.sites.models import Site from django.contrib.syndication.views import Feed from django.core.cache import cache from django.db.models import Q +from django.utils.feedgenerator import Rss201rev2Feed from django.utils.hashcompat import md5_constructor from django.views.decorators.http import condition @@ -13,6 +14,24 @@ from main.utils import CACHE_TIMEOUT, INVALIDATE_TIMEOUT from main.utils import CACHE_PACKAGE_KEY, CACHE_NEWS_KEY from news.models import News +def check_for_unique_id(f): + def wrapper(name, contents=None, attrs=None): + if attrs is None: + attrs = {} + if name == 'guid': + attrs['isPermaLink'] = 'false' + return f(name, contents, attrs) + return wrapper + +class GuidNotPermalinkFeed(Rss201rev2Feed): + def write_items(self, handler): + # Totally disgusting. Monkey-patch the hander so if it sees a + # 'unique-id' field come through, add an isPermalink="false" attribute. + # Workaround for http://code.djangoproject.com/ticket/9800 + handler.addQuickElement = check_for_unique_id(handler.addQuickElement) + super(GuidNotPermalinkFeed, self).write_items(handler) + + def utc_offset(): '''Calculate the UTC offset from local time. Useful for converting values stored in local time to things like cache last modifed headers.''' @@ -53,6 +72,8 @@ def package_last_modified(request, *args, **kwargs): return retrieve_package_latest() class PackageFeed(Feed): + feed_type = GuidNotPermalinkFeed + link = '/packages/' title_template = 'feeds/packages_title.html' description_template = 'feeds/packages_description.html' @@ -142,6 +163,8 @@ def news_last_modified(request, *args, **kwargs): return retrieve_news_latest() class NewsFeed(Feed): + feed_type = GuidNotPermalinkFeed + title = 'Arch Linux: Recent news updates' link = '/news/' description = 'The latest and greatest news from the Arch Linux distribution.' -- cgit v1.2.3-2-g168b From 01db07bad844e17e084f650b6732647f77a91c5c Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 7 Apr 2011 15:39:53 -0500 Subject: Use UTC datetime objects everywhere Rather than the twisted mix of local times and UTC times we currently have. Signed-off-by: Dan McGee --- feeds.py | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'feeds.py') diff --git a/feeds.py b/feeds.py index 7a2f2e40..0be12531 100644 --- a/feeds.py +++ b/feeds.py @@ -1,5 +1,4 @@ -import datetime -from decimal import Decimal, ROUND_HALF_DOWN +import pytz from django.contrib.sites.models import Site from django.contrib.syndication.views import Feed @@ -10,7 +9,7 @@ from django.utils.hashcompat import md5_constructor from django.views.decorators.http import condition from main.models import Arch, Repo, Package -from main.utils import CACHE_TIMEOUT, INVALIDATE_TIMEOUT +from main.utils import CACHE_TIMEOUT from main.utils import CACHE_PACKAGE_KEY, CACHE_NEWS_KEY from news.models import News @@ -32,17 +31,6 @@ class GuidNotPermalinkFeed(Rss201rev2Feed): super(GuidNotPermalinkFeed, self).write_items(handler) -def utc_offset(): - '''Calculate the UTC offset from local time. Useful for converting values - stored in local time to things like cache last modifed headers.''' - timediff = datetime.datetime.utcnow() - datetime.datetime.now() - secs = timediff.days * 86400 + timediff.seconds - # round to nearest minute - mins = Decimal(secs) / Decimal(60) - mins = mins.quantize(Decimal('0'), rounding=ROUND_HALF_DOWN) - return datetime.timedelta(minutes=int(mins)) - - def retrieve_package_latest(): # we could break this down based on the request url, but it would probably # cost us more in query time to do so. @@ -52,7 +40,6 @@ def retrieve_package_latest(): try: latest = Package.objects.values('last_update').latest( 'last_update')['last_update'] - latest = latest + utc_offset() # Using add means "don't overwrite anything in there". What could be in # there is an explicit None value that our refresh signal set, which # means we want to avoid race condition possibilities for a bit. @@ -132,7 +119,7 @@ class PackageFeed(Feed): date.strftime('%Y%m%d%H%M')) def item_pubdate(self, item): - return item.last_update + return item.last_update.replace(tzinfo=pytz.utc) def item_categories(self, item): return (item.repo.name, item.arch.name) @@ -145,7 +132,6 @@ def retrieve_news_latest(): try: latest = News.objects.values('last_modified').latest( 'last_modified')['last_modified'] - latest = latest + utc_offset() # same thoughts apply as in retrieve_package_latest cache.add(CACHE_NEWS_KEY, latest, CACHE_TIMEOUT) return latest @@ -184,7 +170,7 @@ class NewsFeed(Feed): return item.guid def item_pubdate(self, item): - return item.postdate + return item.postdate.replace(tzinfo=pytz.utc) def item_author_name(self, item): return item.author.get_full_name() -- cgit v1.2.3-2-g168b From 77842a6c76095277b024505708bf528d455b9c89 Mon Sep 17 00:00:00 2001 From: Dan McGee Date: Thu, 7 Apr 2011 16:52:52 -0500 Subject: Consolidate caching black magic Get the stuff used to retrieve and refresh the latest date values all in the same place, and make it a bit more beautiful by refactoring it all into a common set of methods. Signed-off-by: Dan McGee --- feeds.py | 44 +++++--------------------------------------- 1 file changed, 5 insertions(+), 39 deletions(-) (limited to 'feeds.py') diff --git a/feeds.py b/feeds.py index 0be12531..269d0a38 100644 --- a/feeds.py +++ b/feeds.py @@ -2,15 +2,13 @@ import pytz from django.contrib.sites.models import Site from django.contrib.syndication.views import Feed -from django.core.cache import cache from django.db.models import Q from django.utils.feedgenerator import Rss201rev2Feed from django.utils.hashcompat import md5_constructor from django.views.decorators.http import condition +from main.utils import retrieve_latest from main.models import Arch, Repo, Package -from main.utils import CACHE_TIMEOUT -from main.utils import CACHE_PACKAGE_KEY, CACHE_NEWS_KEY from news.models import News def check_for_unique_id(f): @@ -31,32 +29,14 @@ class GuidNotPermalinkFeed(Rss201rev2Feed): super(GuidNotPermalinkFeed, self).write_items(handler) -def retrieve_package_latest(): - # we could break this down based on the request url, but it would probably - # cost us more in query time to do so. - latest = cache.get(CACHE_PACKAGE_KEY) - if latest: - return latest - try: - latest = Package.objects.values('last_update').latest( - 'last_update')['last_update'] - # Using add means "don't overwrite anything in there". What could be in - # there is an explicit None value that our refresh signal set, which - # means we want to avoid race condition possibilities for a bit. - cache.add(CACHE_PACKAGE_KEY, latest, CACHE_TIMEOUT) - return latest - except Package.DoesNotExist: - pass - return None - def package_etag(request, *args, **kwargs): - latest = retrieve_package_latest() + latest = retrieve_latest(Package) if latest: return md5_constructor(str(kwargs) + str(latest)).hexdigest() return None def package_last_modified(request, *args, **kwargs): - return retrieve_package_latest() + return retrieve_latest(Package) class PackageFeed(Feed): feed_type = GuidNotPermalinkFeed @@ -125,28 +105,14 @@ class PackageFeed(Feed): return (item.repo.name, item.arch.name) -def retrieve_news_latest(): - latest = cache.get(CACHE_NEWS_KEY) - if latest: - return latest - try: - latest = News.objects.values('last_modified').latest( - 'last_modified')['last_modified'] - # same thoughts apply as in retrieve_package_latest - cache.add(CACHE_NEWS_KEY, latest, CACHE_TIMEOUT) - return latest - except News.DoesNotExist: - pass - return None - def news_etag(request, *args, **kwargs): - latest = retrieve_news_latest() + latest = retrieve_latest(News) if latest: return md5_constructor(str(latest)).hexdigest() return None def news_last_modified(request, *args, **kwargs): - return retrieve_news_latest() + return retrieve_latest(News) class NewsFeed(Feed): feed_type = GuidNotPermalinkFeed -- cgit v1.2.3-2-g168b