web/lib/django/utils/feedgenerator.py
changeset 38 77b6da96e6f1
equal deleted inserted replaced
37:8d941af65caf 38:77b6da96e6f1
       
     1 """
       
     2 Syndication feed generation library -- used for generating RSS, etc.
       
     3 
       
     4 Sample usage:
       
     5 
       
     6 >>> from django.utils import feedgenerator
       
     7 >>> feed = feedgenerator.Rss201rev2Feed(
       
     8 ...     title=u"Poynter E-Media Tidbits",
       
     9 ...     link=u"http://www.poynter.org/column.asp?id=31",
       
    10 ...     description=u"A group weblog by the sharpest minds in online media/journalism/publishing.",
       
    11 ...     language=u"en",
       
    12 ... )
       
    13 >>> feed.add_item(
       
    14 ...     title="Hello",
       
    15 ...     link=u"http://www.holovaty.com/test/",
       
    16 ...     description="Testing."
       
    17 ... )
       
    18 >>> fp = open('test.rss', 'w')
       
    19 >>> feed.write(fp, 'utf-8')
       
    20 >>> fp.close()
       
    21 
       
    22 For definitions of the different versions of RSS, see:
       
    23 http://diveintomark.org/archives/2004/02/04/incompatible-rss
       
    24 """
       
    25 
       
    26 import datetime
       
    27 import urlparse
       
    28 from django.utils.xmlutils import SimplerXMLGenerator
       
    29 from django.utils.encoding import force_unicode, iri_to_uri
       
    30 
       
    31 def rfc2822_date(date):
       
    32     # We do this ourselves to be timezone aware, email.Utils is not tz aware.
       
    33     if date.tzinfo:
       
    34         time_str = date.strftime('%a, %d %b %Y %H:%M:%S ')
       
    35         offset = date.tzinfo.utcoffset(date)
       
    36         timezone = (offset.days * 24 * 60) + (offset.seconds / 60)
       
    37         hour, minute = divmod(timezone, 60)
       
    38         return time_str + "%+03d%02d" % (hour, minute)
       
    39     else:
       
    40         return date.strftime('%a, %d %b %Y %H:%M:%S -0000')
       
    41 
       
    42 def rfc3339_date(date):
       
    43     if date.tzinfo:
       
    44         time_str = date.strftime('%Y-%m-%dT%H:%M:%S')
       
    45         offset = date.tzinfo.utcoffset(date)
       
    46         timezone = (offset.days * 24 * 60) + (offset.seconds / 60)
       
    47         hour, minute = divmod(timezone, 60)
       
    48         return time_str + "%+03d:%02d" % (hour, minute)
       
    49     else:
       
    50         return date.strftime('%Y-%m-%dT%H:%M:%SZ')
       
    51 
       
    52 def get_tag_uri(url, date):
       
    53     """
       
    54     Creates a TagURI.
       
    55 
       
    56     See http://diveintomark.org/archives/2004/05/28/howto-atom-id
       
    57     """
       
    58     url_split = urlparse.urlparse(url)
       
    59 
       
    60     # Python 2.4 didn't have named attributes on split results or the hostname.
       
    61     hostname = getattr(url_split, 'hostname', url_split[1].split(':')[0])
       
    62     path = url_split[2]
       
    63     fragment = url_split[5]
       
    64 
       
    65     d = ''
       
    66     if date is not None:
       
    67         d = ',%s' % date.strftime('%Y-%m-%d')
       
    68     return u'tag:%s%s:%s/%s' % (hostname, d, path, fragment)
       
    69 
       
    70 class SyndicationFeed(object):
       
    71     "Base class for all syndication feeds. Subclasses should provide write()"
       
    72     def __init__(self, title, link, description, language=None, author_email=None,
       
    73             author_name=None, author_link=None, subtitle=None, categories=None,
       
    74             feed_url=None, feed_copyright=None, feed_guid=None, ttl=None, **kwargs):
       
    75         to_unicode = lambda s: force_unicode(s, strings_only=True)
       
    76         if categories:
       
    77             categories = [force_unicode(c) for c in categories]
       
    78         if ttl is not None:
       
    79             # Force ints to unicode
       
    80             ttl = force_unicode(ttl)
       
    81         self.feed = {
       
    82             'title': to_unicode(title),
       
    83             'link': iri_to_uri(link),
       
    84             'description': to_unicode(description),
       
    85             'language': to_unicode(language),
       
    86             'author_email': to_unicode(author_email),
       
    87             'author_name': to_unicode(author_name),
       
    88             'author_link': iri_to_uri(author_link),
       
    89             'subtitle': to_unicode(subtitle),
       
    90             'categories': categories or (),
       
    91             'feed_url': iri_to_uri(feed_url),
       
    92             'feed_copyright': to_unicode(feed_copyright),
       
    93             'id': feed_guid or link,
       
    94             'ttl': ttl,
       
    95         }
       
    96         self.feed.update(kwargs)
       
    97         self.items = []
       
    98 
       
    99     def add_item(self, title, link, description, author_email=None,
       
   100         author_name=None, author_link=None, pubdate=None, comments=None,
       
   101         unique_id=None, enclosure=None, categories=(), item_copyright=None,
       
   102         ttl=None, **kwargs):
       
   103         """
       
   104         Adds an item to the feed. All args are expected to be Python Unicode
       
   105         objects except pubdate, which is a datetime.datetime object, and
       
   106         enclosure, which is an instance of the Enclosure class.
       
   107         """
       
   108         to_unicode = lambda s: force_unicode(s, strings_only=True)
       
   109         if categories:
       
   110             categories = [to_unicode(c) for c in categories]
       
   111         if ttl is not None:
       
   112             # Force ints to unicode
       
   113             ttl = force_unicode(ttl)
       
   114         item = {
       
   115             'title': to_unicode(title),
       
   116             'link': iri_to_uri(link),
       
   117             'description': to_unicode(description),
       
   118             'author_email': to_unicode(author_email),
       
   119             'author_name': to_unicode(author_name),
       
   120             'author_link': iri_to_uri(author_link),
       
   121             'pubdate': pubdate,
       
   122             'comments': to_unicode(comments),
       
   123             'unique_id': to_unicode(unique_id),
       
   124             'enclosure': enclosure,
       
   125             'categories': categories or (),
       
   126             'item_copyright': to_unicode(item_copyright),
       
   127             'ttl': ttl,
       
   128         }
       
   129         item.update(kwargs)
       
   130         self.items.append(item)
       
   131 
       
   132     def num_items(self):
       
   133         return len(self.items)
       
   134 
       
   135     def root_attributes(self):
       
   136         """
       
   137         Return extra attributes to place on the root (i.e. feed/channel) element.
       
   138         Called from write().
       
   139         """
       
   140         return {}
       
   141 
       
   142     def add_root_elements(self, handler):
       
   143         """
       
   144         Add elements in the root (i.e. feed/channel) element. Called
       
   145         from write().
       
   146         """
       
   147         pass
       
   148 
       
   149     def item_attributes(self, item):
       
   150         """
       
   151         Return extra attributes to place on each item (i.e. item/entry) element.
       
   152         """
       
   153         return {}
       
   154 
       
   155     def add_item_elements(self, handler, item):
       
   156         """
       
   157         Add elements on each item (i.e. item/entry) element.
       
   158         """
       
   159         pass
       
   160 
       
   161     def write(self, outfile, encoding):
       
   162         """
       
   163         Outputs the feed in the given encoding to outfile, which is a file-like
       
   164         object. Subclasses should override this.
       
   165         """
       
   166         raise NotImplementedError
       
   167 
       
   168     def writeString(self, encoding):
       
   169         """
       
   170         Returns the feed in the given encoding as a string.
       
   171         """
       
   172         from StringIO import StringIO
       
   173         s = StringIO()
       
   174         self.write(s, encoding)
       
   175         return s.getvalue()
       
   176 
       
   177     def latest_post_date(self):
       
   178         """
       
   179         Returns the latest item's pubdate. If none of them have a pubdate,
       
   180         this returns the current date/time.
       
   181         """
       
   182         updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None]
       
   183         if len(updates) > 0:
       
   184             updates.sort()
       
   185             return updates[-1]
       
   186         else:
       
   187             return datetime.datetime.now()
       
   188 
       
   189 class Enclosure(object):
       
   190     "Represents an RSS enclosure"
       
   191     def __init__(self, url, length, mime_type):
       
   192         "All args are expected to be Python Unicode objects"
       
   193         self.length, self.mime_type = length, mime_type
       
   194         self.url = iri_to_uri(url)
       
   195 
       
   196 class RssFeed(SyndicationFeed):
       
   197     mime_type = 'application/rss+xml'
       
   198     def write(self, outfile, encoding):
       
   199         handler = SimplerXMLGenerator(outfile, encoding)
       
   200         handler.startDocument()
       
   201         handler.startElement(u"rss", self.rss_attributes())
       
   202         handler.startElement(u"channel", self.root_attributes())
       
   203         self.add_root_elements(handler)
       
   204         self.write_items(handler)
       
   205         self.endChannelElement(handler)
       
   206         handler.endElement(u"rss")
       
   207 
       
   208     def rss_attributes(self):
       
   209         return {u"version": self._version,
       
   210                 u"xmlns:atom": u"http://www.w3.org/2005/Atom"}
       
   211 
       
   212     def write_items(self, handler):
       
   213         for item in self.items:
       
   214             handler.startElement(u'item', self.item_attributes(item))
       
   215             self.add_item_elements(handler, item)
       
   216             handler.endElement(u"item")
       
   217 
       
   218     def add_root_elements(self, handler):
       
   219         handler.addQuickElement(u"title", self.feed['title'])
       
   220         handler.addQuickElement(u"link", self.feed['link'])
       
   221         handler.addQuickElement(u"description", self.feed['description'])
       
   222         handler.addQuickElement(u"atom:link", None, {u"rel": u"self", u"href": self.feed['feed_url']})
       
   223         if self.feed['language'] is not None:
       
   224             handler.addQuickElement(u"language", self.feed['language'])
       
   225         for cat in self.feed['categories']:
       
   226             handler.addQuickElement(u"category", cat)
       
   227         if self.feed['feed_copyright'] is not None:
       
   228             handler.addQuickElement(u"copyright", self.feed['feed_copyright'])
       
   229         handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('utf-8'))
       
   230         if self.feed['ttl'] is not None:
       
   231             handler.addQuickElement(u"ttl", self.feed['ttl'])
       
   232 
       
   233     def endChannelElement(self, handler):
       
   234         handler.endElement(u"channel")
       
   235 
       
   236 class RssUserland091Feed(RssFeed):
       
   237     _version = u"0.91"
       
   238     def add_item_elements(self, handler, item):
       
   239         handler.addQuickElement(u"title", item['title'])
       
   240         handler.addQuickElement(u"link", item['link'])
       
   241         if item['description'] is not None:
       
   242             handler.addQuickElement(u"description", item['description'])
       
   243 
       
   244 class Rss201rev2Feed(RssFeed):
       
   245     # Spec: http://blogs.law.harvard.edu/tech/rss
       
   246     _version = u"2.0"
       
   247     def add_item_elements(self, handler, item):
       
   248         handler.addQuickElement(u"title", item['title'])
       
   249         handler.addQuickElement(u"link", item['link'])
       
   250         if item['description'] is not None:
       
   251             handler.addQuickElement(u"description", item['description'])
       
   252 
       
   253         # Author information.
       
   254         if item["author_name"] and item["author_email"]:
       
   255             handler.addQuickElement(u"author", "%s (%s)" % \
       
   256                 (item['author_email'], item['author_name']))
       
   257         elif item["author_email"]:
       
   258             handler.addQuickElement(u"author", item["author_email"])
       
   259         elif item["author_name"]:
       
   260             handler.addQuickElement(u"dc:creator", item["author_name"], {u"xmlns:dc": u"http://purl.org/dc/elements/1.1/"})
       
   261 
       
   262         if item['pubdate'] is not None:
       
   263             handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('utf-8'))
       
   264         if item['comments'] is not None:
       
   265             handler.addQuickElement(u"comments", item['comments'])
       
   266         if item['unique_id'] is not None:
       
   267             handler.addQuickElement(u"guid", item['unique_id'])
       
   268         if item['ttl'] is not None:
       
   269             handler.addQuickElement(u"ttl", item['ttl'])
       
   270 
       
   271         # Enclosure.
       
   272         if item['enclosure'] is not None:
       
   273             handler.addQuickElement(u"enclosure", '',
       
   274                 {u"url": item['enclosure'].url, u"length": item['enclosure'].length,
       
   275                     u"type": item['enclosure'].mime_type})
       
   276 
       
   277         # Categories.
       
   278         for cat in item['categories']:
       
   279             handler.addQuickElement(u"category", cat)
       
   280 
       
   281 class Atom1Feed(SyndicationFeed):
       
   282     # Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html
       
   283     mime_type = 'application/atom+xml'
       
   284     ns = u"http://www.w3.org/2005/Atom"
       
   285 
       
   286     def write(self, outfile, encoding):
       
   287         handler = SimplerXMLGenerator(outfile, encoding)
       
   288         handler.startDocument()
       
   289         handler.startElement(u'feed', self.root_attributes())
       
   290         self.add_root_elements(handler)
       
   291         self.write_items(handler)
       
   292         handler.endElement(u"feed")
       
   293 
       
   294     def root_attributes(self):
       
   295         if self.feed['language'] is not None:
       
   296             return {u"xmlns": self.ns, u"xml:lang": self.feed['language']}
       
   297         else:
       
   298             return {u"xmlns": self.ns}
       
   299 
       
   300     def add_root_elements(self, handler):
       
   301         handler.addQuickElement(u"title", self.feed['title'])
       
   302         handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']})
       
   303         if self.feed['feed_url'] is not None:
       
   304             handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']})
       
   305         handler.addQuickElement(u"id", self.feed['id'])
       
   306         handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('utf-8'))
       
   307         if self.feed['author_name'] is not None:
       
   308             handler.startElement(u"author", {})
       
   309             handler.addQuickElement(u"name", self.feed['author_name'])
       
   310             if self.feed['author_email'] is not None:
       
   311                 handler.addQuickElement(u"email", self.feed['author_email'])
       
   312             if self.feed['author_link'] is not None:
       
   313                 handler.addQuickElement(u"uri", self.feed['author_link'])
       
   314             handler.endElement(u"author")
       
   315         if self.feed['subtitle'] is not None:
       
   316             handler.addQuickElement(u"subtitle", self.feed['subtitle'])
       
   317         for cat in self.feed['categories']:
       
   318             handler.addQuickElement(u"category", "", {u"term": cat})
       
   319         if self.feed['feed_copyright'] is not None:
       
   320             handler.addQuickElement(u"rights", self.feed['feed_copyright'])
       
   321 
       
   322     def write_items(self, handler):
       
   323         for item in self.items:
       
   324             handler.startElement(u"entry", self.item_attributes(item))
       
   325             self.add_item_elements(handler, item)
       
   326             handler.endElement(u"entry")
       
   327 
       
   328     def add_item_elements(self, handler, item):
       
   329         handler.addQuickElement(u"title", item['title'])
       
   330         handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"})
       
   331         if item['pubdate'] is not None:
       
   332             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
       
   333 
       
   334         # Author information.
       
   335         if item['author_name'] is not None:
       
   336             handler.startElement(u"author", {})
       
   337             handler.addQuickElement(u"name", item['author_name'])
       
   338             if item['author_email'] is not None:
       
   339                 handler.addQuickElement(u"email", item['author_email'])
       
   340             if item['author_link'] is not None:
       
   341                 handler.addQuickElement(u"uri", item['author_link'])
       
   342             handler.endElement(u"author")
       
   343 
       
   344         # Unique ID.
       
   345         if item['unique_id'] is not None:
       
   346             unique_id = item['unique_id']
       
   347         else:
       
   348             unique_id = get_tag_uri(item['link'], item['pubdate'])
       
   349         handler.addQuickElement(u"id", unique_id)
       
   350 
       
   351         # Summary.
       
   352         if item['description'] is not None:
       
   353             handler.addQuickElement(u"summary", item['description'], {u"type": u"html"})
       
   354 
       
   355         # Enclosure.
       
   356         if item['enclosure'] is not None:
       
   357             handler.addQuickElement(u"link", '',
       
   358                 {u"rel": u"enclosure",
       
   359                  u"href": item['enclosure'].url,
       
   360                  u"length": item['enclosure'].length,
       
   361                  u"type": item['enclosure'].mime_type})
       
   362 
       
   363         # Categories.
       
   364         for cat in item['categories']:
       
   365             handler.addQuickElement(u"category", u"", {u"term": cat})
       
   366 
       
   367         # Rights.
       
   368         if item['item_copyright'] is not None:
       
   369             handler.addQuickElement(u"rights", item['item_copyright'])
       
   370 
       
   371 # This isolates the decision of what the system default is, so calling code can
       
   372 # do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed".
       
   373 DefaultFeed = Rss201rev2Feed