|
1 """ |
|
2 Syndication feed generation library -- used for generating RSS, etc. |
|
3 |
|
4 Sample usage: |
|
5 |
|
6 >>> from django.utils import feedgenerator |
|
7 >>> feed = feedgenerator.Rss201rev2Feed( |
|
8 ... title=u"Poynter E-Media Tidbits", |
|
9 ... link=u"http://www.poynter.org/column.asp?id=31", |
|
10 ... description=u"A group weblog by the sharpest minds in online media/journalism/publishing.", |
|
11 ... language=u"en", |
|
12 ... ) |
|
13 >>> feed.add_item( |
|
14 ... title="Hello", |
|
15 ... link=u"http://www.holovaty.com/test/", |
|
16 ... description="Testing." |
|
17 ... ) |
|
18 >>> fp = open('test.rss', 'w') |
|
19 >>> feed.write(fp, 'utf-8') |
|
20 >>> fp.close() |
|
21 |
|
22 For definitions of the different versions of RSS, see: |
|
23 http://diveintomark.org/archives/2004/02/04/incompatible-rss |
|
24 """ |
|
25 |
|
26 import datetime |
|
27 import urlparse |
|
28 from django.utils.xmlutils import SimplerXMLGenerator |
|
29 from django.utils.encoding import force_unicode, iri_to_uri |
|
30 |
|
31 def rfc2822_date(date): |
|
32 # We do this ourselves to be timezone aware, email.Utils is not tz aware. |
|
33 if date.tzinfo: |
|
34 time_str = date.strftime('%a, %d %b %Y %H:%M:%S ') |
|
35 offset = date.tzinfo.utcoffset(date) |
|
36 timezone = (offset.days * 24 * 60) + (offset.seconds / 60) |
|
37 hour, minute = divmod(timezone, 60) |
|
38 return time_str + "%+03d%02d" % (hour, minute) |
|
39 else: |
|
40 return date.strftime('%a, %d %b %Y %H:%M:%S -0000') |
|
41 |
|
42 def rfc3339_date(date): |
|
43 if date.tzinfo: |
|
44 time_str = date.strftime('%Y-%m-%dT%H:%M:%S') |
|
45 offset = date.tzinfo.utcoffset(date) |
|
46 timezone = (offset.days * 24 * 60) + (offset.seconds / 60) |
|
47 hour, minute = divmod(timezone, 60) |
|
48 return time_str + "%+03d:%02d" % (hour, minute) |
|
49 else: |
|
50 return date.strftime('%Y-%m-%dT%H:%M:%SZ') |
|
51 |
|
52 def get_tag_uri(url, date): |
|
53 """ |
|
54 Creates a TagURI. |
|
55 |
|
56 See http://diveintomark.org/archives/2004/05/28/howto-atom-id |
|
57 """ |
|
58 url_split = urlparse.urlparse(url) |
|
59 |
|
60 # Python 2.4 didn't have named attributes on split results or the hostname. |
|
61 hostname = getattr(url_split, 'hostname', url_split[1].split(':')[0]) |
|
62 path = url_split[2] |
|
63 fragment = url_split[5] |
|
64 |
|
65 d = '' |
|
66 if date is not None: |
|
67 d = ',%s' % date.strftime('%Y-%m-%d') |
|
68 return u'tag:%s%s:%s/%s' % (hostname, d, path, fragment) |
|
69 |
|
70 class SyndicationFeed(object): |
|
71 "Base class for all syndication feeds. Subclasses should provide write()" |
|
72 def __init__(self, title, link, description, language=None, author_email=None, |
|
73 author_name=None, author_link=None, subtitle=None, categories=None, |
|
74 feed_url=None, feed_copyright=None, feed_guid=None, ttl=None, **kwargs): |
|
75 to_unicode = lambda s: force_unicode(s, strings_only=True) |
|
76 if categories: |
|
77 categories = [force_unicode(c) for c in categories] |
|
78 if ttl is not None: |
|
79 # Force ints to unicode |
|
80 ttl = force_unicode(ttl) |
|
81 self.feed = { |
|
82 'title': to_unicode(title), |
|
83 'link': iri_to_uri(link), |
|
84 'description': to_unicode(description), |
|
85 'language': to_unicode(language), |
|
86 'author_email': to_unicode(author_email), |
|
87 'author_name': to_unicode(author_name), |
|
88 'author_link': iri_to_uri(author_link), |
|
89 'subtitle': to_unicode(subtitle), |
|
90 'categories': categories or (), |
|
91 'feed_url': iri_to_uri(feed_url), |
|
92 'feed_copyright': to_unicode(feed_copyright), |
|
93 'id': feed_guid or link, |
|
94 'ttl': ttl, |
|
95 } |
|
96 self.feed.update(kwargs) |
|
97 self.items = [] |
|
98 |
|
99 def add_item(self, title, link, description, author_email=None, |
|
100 author_name=None, author_link=None, pubdate=None, comments=None, |
|
101 unique_id=None, enclosure=None, categories=(), item_copyright=None, |
|
102 ttl=None, **kwargs): |
|
103 """ |
|
104 Adds an item to the feed. All args are expected to be Python Unicode |
|
105 objects except pubdate, which is a datetime.datetime object, and |
|
106 enclosure, which is an instance of the Enclosure class. |
|
107 """ |
|
108 to_unicode = lambda s: force_unicode(s, strings_only=True) |
|
109 if categories: |
|
110 categories = [to_unicode(c) for c in categories] |
|
111 if ttl is not None: |
|
112 # Force ints to unicode |
|
113 ttl = force_unicode(ttl) |
|
114 item = { |
|
115 'title': to_unicode(title), |
|
116 'link': iri_to_uri(link), |
|
117 'description': to_unicode(description), |
|
118 'author_email': to_unicode(author_email), |
|
119 'author_name': to_unicode(author_name), |
|
120 'author_link': iri_to_uri(author_link), |
|
121 'pubdate': pubdate, |
|
122 'comments': to_unicode(comments), |
|
123 'unique_id': to_unicode(unique_id), |
|
124 'enclosure': enclosure, |
|
125 'categories': categories or (), |
|
126 'item_copyright': to_unicode(item_copyright), |
|
127 'ttl': ttl, |
|
128 } |
|
129 item.update(kwargs) |
|
130 self.items.append(item) |
|
131 |
|
132 def num_items(self): |
|
133 return len(self.items) |
|
134 |
|
135 def root_attributes(self): |
|
136 """ |
|
137 Return extra attributes to place on the root (i.e. feed/channel) element. |
|
138 Called from write(). |
|
139 """ |
|
140 return {} |
|
141 |
|
142 def add_root_elements(self, handler): |
|
143 """ |
|
144 Add elements in the root (i.e. feed/channel) element. Called |
|
145 from write(). |
|
146 """ |
|
147 pass |
|
148 |
|
149 def item_attributes(self, item): |
|
150 """ |
|
151 Return extra attributes to place on each item (i.e. item/entry) element. |
|
152 """ |
|
153 return {} |
|
154 |
|
155 def add_item_elements(self, handler, item): |
|
156 """ |
|
157 Add elements on each item (i.e. item/entry) element. |
|
158 """ |
|
159 pass |
|
160 |
|
161 def write(self, outfile, encoding): |
|
162 """ |
|
163 Outputs the feed in the given encoding to outfile, which is a file-like |
|
164 object. Subclasses should override this. |
|
165 """ |
|
166 raise NotImplementedError |
|
167 |
|
168 def writeString(self, encoding): |
|
169 """ |
|
170 Returns the feed in the given encoding as a string. |
|
171 """ |
|
172 from StringIO import StringIO |
|
173 s = StringIO() |
|
174 self.write(s, encoding) |
|
175 return s.getvalue() |
|
176 |
|
177 def latest_post_date(self): |
|
178 """ |
|
179 Returns the latest item's pubdate. If none of them have a pubdate, |
|
180 this returns the current date/time. |
|
181 """ |
|
182 updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None] |
|
183 if len(updates) > 0: |
|
184 updates.sort() |
|
185 return updates[-1] |
|
186 else: |
|
187 return datetime.datetime.now() |
|
188 |
|
189 class Enclosure(object): |
|
190 "Represents an RSS enclosure" |
|
191 def __init__(self, url, length, mime_type): |
|
192 "All args are expected to be Python Unicode objects" |
|
193 self.length, self.mime_type = length, mime_type |
|
194 self.url = iri_to_uri(url) |
|
195 |
|
196 class RssFeed(SyndicationFeed): |
|
197 mime_type = 'application/rss+xml' |
|
198 def write(self, outfile, encoding): |
|
199 handler = SimplerXMLGenerator(outfile, encoding) |
|
200 handler.startDocument() |
|
201 handler.startElement(u"rss", self.rss_attributes()) |
|
202 handler.startElement(u"channel", self.root_attributes()) |
|
203 self.add_root_elements(handler) |
|
204 self.write_items(handler) |
|
205 self.endChannelElement(handler) |
|
206 handler.endElement(u"rss") |
|
207 |
|
208 def rss_attributes(self): |
|
209 return {u"version": self._version, |
|
210 u"xmlns:atom": u"http://www.w3.org/2005/Atom"} |
|
211 |
|
212 def write_items(self, handler): |
|
213 for item in self.items: |
|
214 handler.startElement(u'item', self.item_attributes(item)) |
|
215 self.add_item_elements(handler, item) |
|
216 handler.endElement(u"item") |
|
217 |
|
218 def add_root_elements(self, handler): |
|
219 handler.addQuickElement(u"title", self.feed['title']) |
|
220 handler.addQuickElement(u"link", self.feed['link']) |
|
221 handler.addQuickElement(u"description", self.feed['description']) |
|
222 handler.addQuickElement(u"atom:link", None, {u"rel": u"self", u"href": self.feed['feed_url']}) |
|
223 if self.feed['language'] is not None: |
|
224 handler.addQuickElement(u"language", self.feed['language']) |
|
225 for cat in self.feed['categories']: |
|
226 handler.addQuickElement(u"category", cat) |
|
227 if self.feed['feed_copyright'] is not None: |
|
228 handler.addQuickElement(u"copyright", self.feed['feed_copyright']) |
|
229 handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('utf-8')) |
|
230 if self.feed['ttl'] is not None: |
|
231 handler.addQuickElement(u"ttl", self.feed['ttl']) |
|
232 |
|
233 def endChannelElement(self, handler): |
|
234 handler.endElement(u"channel") |
|
235 |
|
236 class RssUserland091Feed(RssFeed): |
|
237 _version = u"0.91" |
|
238 def add_item_elements(self, handler, item): |
|
239 handler.addQuickElement(u"title", item['title']) |
|
240 handler.addQuickElement(u"link", item['link']) |
|
241 if item['description'] is not None: |
|
242 handler.addQuickElement(u"description", item['description']) |
|
243 |
|
244 class Rss201rev2Feed(RssFeed): |
|
245 # Spec: http://blogs.law.harvard.edu/tech/rss |
|
246 _version = u"2.0" |
|
247 def add_item_elements(self, handler, item): |
|
248 handler.addQuickElement(u"title", item['title']) |
|
249 handler.addQuickElement(u"link", item['link']) |
|
250 if item['description'] is not None: |
|
251 handler.addQuickElement(u"description", item['description']) |
|
252 |
|
253 # Author information. |
|
254 if item["author_name"] and item["author_email"]: |
|
255 handler.addQuickElement(u"author", "%s (%s)" % \ |
|
256 (item['author_email'], item['author_name'])) |
|
257 elif item["author_email"]: |
|
258 handler.addQuickElement(u"author", item["author_email"]) |
|
259 elif item["author_name"]: |
|
260 handler.addQuickElement(u"dc:creator", item["author_name"], {u"xmlns:dc": u"http://purl.org/dc/elements/1.1/"}) |
|
261 |
|
262 if item['pubdate'] is not None: |
|
263 handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('utf-8')) |
|
264 if item['comments'] is not None: |
|
265 handler.addQuickElement(u"comments", item['comments']) |
|
266 if item['unique_id'] is not None: |
|
267 handler.addQuickElement(u"guid", item['unique_id']) |
|
268 if item['ttl'] is not None: |
|
269 handler.addQuickElement(u"ttl", item['ttl']) |
|
270 |
|
271 # Enclosure. |
|
272 if item['enclosure'] is not None: |
|
273 handler.addQuickElement(u"enclosure", '', |
|
274 {u"url": item['enclosure'].url, u"length": item['enclosure'].length, |
|
275 u"type": item['enclosure'].mime_type}) |
|
276 |
|
277 # Categories. |
|
278 for cat in item['categories']: |
|
279 handler.addQuickElement(u"category", cat) |
|
280 |
|
281 class Atom1Feed(SyndicationFeed): |
|
282 # Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html |
|
283 mime_type = 'application/atom+xml' |
|
284 ns = u"http://www.w3.org/2005/Atom" |
|
285 |
|
286 def write(self, outfile, encoding): |
|
287 handler = SimplerXMLGenerator(outfile, encoding) |
|
288 handler.startDocument() |
|
289 handler.startElement(u'feed', self.root_attributes()) |
|
290 self.add_root_elements(handler) |
|
291 self.write_items(handler) |
|
292 handler.endElement(u"feed") |
|
293 |
|
294 def root_attributes(self): |
|
295 if self.feed['language'] is not None: |
|
296 return {u"xmlns": self.ns, u"xml:lang": self.feed['language']} |
|
297 else: |
|
298 return {u"xmlns": self.ns} |
|
299 |
|
300 def add_root_elements(self, handler): |
|
301 handler.addQuickElement(u"title", self.feed['title']) |
|
302 handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']}) |
|
303 if self.feed['feed_url'] is not None: |
|
304 handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']}) |
|
305 handler.addQuickElement(u"id", self.feed['id']) |
|
306 handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('utf-8')) |
|
307 if self.feed['author_name'] is not None: |
|
308 handler.startElement(u"author", {}) |
|
309 handler.addQuickElement(u"name", self.feed['author_name']) |
|
310 if self.feed['author_email'] is not None: |
|
311 handler.addQuickElement(u"email", self.feed['author_email']) |
|
312 if self.feed['author_link'] is not None: |
|
313 handler.addQuickElement(u"uri", self.feed['author_link']) |
|
314 handler.endElement(u"author") |
|
315 if self.feed['subtitle'] is not None: |
|
316 handler.addQuickElement(u"subtitle", self.feed['subtitle']) |
|
317 for cat in self.feed['categories']: |
|
318 handler.addQuickElement(u"category", "", {u"term": cat}) |
|
319 if self.feed['feed_copyright'] is not None: |
|
320 handler.addQuickElement(u"rights", self.feed['feed_copyright']) |
|
321 |
|
322 def write_items(self, handler): |
|
323 for item in self.items: |
|
324 handler.startElement(u"entry", self.item_attributes(item)) |
|
325 self.add_item_elements(handler, item) |
|
326 handler.endElement(u"entry") |
|
327 |
|
328 def add_item_elements(self, handler, item): |
|
329 handler.addQuickElement(u"title", item['title']) |
|
330 handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"}) |
|
331 if item['pubdate'] is not None: |
|
332 handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8')) |
|
333 |
|
334 # Author information. |
|
335 if item['author_name'] is not None: |
|
336 handler.startElement(u"author", {}) |
|
337 handler.addQuickElement(u"name", item['author_name']) |
|
338 if item['author_email'] is not None: |
|
339 handler.addQuickElement(u"email", item['author_email']) |
|
340 if item['author_link'] is not None: |
|
341 handler.addQuickElement(u"uri", item['author_link']) |
|
342 handler.endElement(u"author") |
|
343 |
|
344 # Unique ID. |
|
345 if item['unique_id'] is not None: |
|
346 unique_id = item['unique_id'] |
|
347 else: |
|
348 unique_id = get_tag_uri(item['link'], item['pubdate']) |
|
349 handler.addQuickElement(u"id", unique_id) |
|
350 |
|
351 # Summary. |
|
352 if item['description'] is not None: |
|
353 handler.addQuickElement(u"summary", item['description'], {u"type": u"html"}) |
|
354 |
|
355 # Enclosure. |
|
356 if item['enclosure'] is not None: |
|
357 handler.addQuickElement(u"link", '', |
|
358 {u"rel": u"enclosure", |
|
359 u"href": item['enclosure'].url, |
|
360 u"length": item['enclosure'].length, |
|
361 u"type": item['enclosure'].mime_type}) |
|
362 |
|
363 # Categories. |
|
364 for cat in item['categories']: |
|
365 handler.addQuickElement(u"category", u"", {u"term": cat}) |
|
366 |
|
367 # Rights. |
|
368 if item['item_copyright'] is not None: |
|
369 handler.addQuickElement(u"rights", item['item_copyright']) |
|
370 |
|
371 # This isolates the decision of what the system default is, so calling code can |
|
372 # do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed". |
|
373 DefaultFeed = Rss201rev2Feed |