|
1 """ |
|
2 Syndication feed generation library -- used for generating RSS, etc. |
|
3 |
|
4 Sample usage: |
|
5 |
|
6 >>> from django.utils import feedgenerator |
|
7 >>> feed = feedgenerator.Rss201rev2Feed( |
|
8 ... title=u"Poynter E-Media Tidbits", |
|
9 ... link=u"http://www.poynter.org/column.asp?id=31", |
|
10 ... description=u"A group weblog by the sharpest minds in online media/journalism/publishing.", |
|
11 ... language=u"en", |
|
12 ... ) |
|
13 >>> feed.add_item(title="Hello", link=u"http://www.holovaty.com/test/", description="Testing.") |
|
14 >>> fp = open('test.rss', 'w') |
|
15 >>> feed.write(fp, 'utf-8') |
|
16 >>> fp.close() |
|
17 |
|
18 For definitions of the different versions of RSS, see: |
|
19 http://diveintomark.org/archives/2004/02/04/incompatible-rss |
|
20 """ |
|
21 |
|
22 import re |
|
23 import datetime |
|
24 from django.utils.xmlutils import SimplerXMLGenerator |
|
25 from django.utils.encoding import force_unicode, iri_to_uri |
|
26 |
|
27 def rfc2822_date(date): |
|
28 # We do this ourselves to be timezone aware, email.Utils is not tz aware. |
|
29 if date.tzinfo: |
|
30 time_str = date.strftime('%a, %d %b %Y %H:%M:%S ') |
|
31 offset = date.tzinfo.utcoffset(date) |
|
32 timezone = (offset.days * 24 * 60) + (offset.seconds / 60) |
|
33 hour, minute = divmod(timezone, 60) |
|
34 return time_str + "%+03d%02d" % (hour, minute) |
|
35 else: |
|
36 return date.strftime('%a, %d %b %Y %H:%M:%S -0000') |
|
37 |
|
38 def rfc3339_date(date): |
|
39 if date.tzinfo: |
|
40 time_str = date.strftime('%Y-%m-%dT%H:%M:%S') |
|
41 offset = date.tzinfo.utcoffset(date) |
|
42 timezone = (offset.days * 24 * 60) + (offset.seconds / 60) |
|
43 hour, minute = divmod(timezone, 60) |
|
44 return time_str + "%+03d:%02d" % (hour, minute) |
|
45 else: |
|
46 return date.strftime('%Y-%m-%dT%H:%M:%SZ') |
|
47 |
|
48 def get_tag_uri(url, date): |
|
49 "Creates a TagURI. See http://diveintomark.org/archives/2004/05/28/howto-atom-id" |
|
50 tag = re.sub('^http://', '', url) |
|
51 if date is not None: |
|
52 tag = re.sub('/', ',%s:/' % date.strftime('%Y-%m-%d'), tag, 1) |
|
53 tag = re.sub('#', '/', tag) |
|
54 return u'tag:' + tag |
|
55 |
|
56 class SyndicationFeed(object): |
|
57 "Base class for all syndication feeds. Subclasses should provide write()" |
|
58 def __init__(self, title, link, description, language=None, author_email=None, |
|
59 author_name=None, author_link=None, subtitle=None, categories=None, |
|
60 feed_url=None, feed_copyright=None, feed_guid=None, ttl=None, **kwargs): |
|
61 to_unicode = lambda s: force_unicode(s, strings_only=True) |
|
62 if categories: |
|
63 categories = [force_unicode(c) for c in categories] |
|
64 self.feed = { |
|
65 'title': to_unicode(title), |
|
66 'link': iri_to_uri(link), |
|
67 'description': to_unicode(description), |
|
68 'language': to_unicode(language), |
|
69 'author_email': to_unicode(author_email), |
|
70 'author_name': to_unicode(author_name), |
|
71 'author_link': iri_to_uri(author_link), |
|
72 'subtitle': to_unicode(subtitle), |
|
73 'categories': categories or (), |
|
74 'feed_url': iri_to_uri(feed_url), |
|
75 'feed_copyright': to_unicode(feed_copyright), |
|
76 'id': feed_guid or link, |
|
77 'ttl': ttl, |
|
78 } |
|
79 self.feed.update(kwargs) |
|
80 self.items = [] |
|
81 |
|
82 def add_item(self, title, link, description, author_email=None, |
|
83 author_name=None, author_link=None, pubdate=None, comments=None, |
|
84 unique_id=None, enclosure=None, categories=(), item_copyright=None, |
|
85 ttl=None, **kwargs): |
|
86 """ |
|
87 Adds an item to the feed. All args are expected to be Python Unicode |
|
88 objects except pubdate, which is a datetime.datetime object, and |
|
89 enclosure, which is an instance of the Enclosure class. |
|
90 """ |
|
91 to_unicode = lambda s: force_unicode(s, strings_only=True) |
|
92 if categories: |
|
93 categories = [to_unicode(c) for c in categories] |
|
94 item = { |
|
95 'title': to_unicode(title), |
|
96 'link': iri_to_uri(link), |
|
97 'description': to_unicode(description), |
|
98 'author_email': to_unicode(author_email), |
|
99 'author_name': to_unicode(author_name), |
|
100 'author_link': iri_to_uri(author_link), |
|
101 'pubdate': pubdate, |
|
102 'comments': to_unicode(comments), |
|
103 'unique_id': to_unicode(unique_id), |
|
104 'enclosure': enclosure, |
|
105 'categories': categories or (), |
|
106 'item_copyright': to_unicode(item_copyright), |
|
107 'ttl': ttl, |
|
108 } |
|
109 item.update(kwargs) |
|
110 self.items.append(item) |
|
111 |
|
112 def num_items(self): |
|
113 return len(self.items) |
|
114 |
|
115 def root_attributes(self): |
|
116 """ |
|
117 Return extra attributes to place on the root (i.e. feed/channel) element. |
|
118 Called from write(). |
|
119 """ |
|
120 return {} |
|
121 |
|
122 def add_root_elements(self, handler): |
|
123 """ |
|
124 Add elements in the root (i.e. feed/channel) element. Called |
|
125 from write(). |
|
126 """ |
|
127 pass |
|
128 |
|
129 def item_attributes(self, item): |
|
130 """ |
|
131 Return extra attributes to place on each item (i.e. item/entry) element. |
|
132 """ |
|
133 return {} |
|
134 |
|
135 def add_item_elements(self, handler, item): |
|
136 """ |
|
137 Add elements on each item (i.e. item/entry) element. |
|
138 """ |
|
139 pass |
|
140 |
|
141 def write(self, outfile, encoding): |
|
142 """ |
|
143 Outputs the feed in the given encoding to outfile, which is a file-like |
|
144 object. Subclasses should override this. |
|
145 """ |
|
146 raise NotImplementedError |
|
147 |
|
148 def writeString(self, encoding): |
|
149 """ |
|
150 Returns the feed in the given encoding as a string. |
|
151 """ |
|
152 from StringIO import StringIO |
|
153 s = StringIO() |
|
154 self.write(s, encoding) |
|
155 return s.getvalue() |
|
156 |
|
157 def latest_post_date(self): |
|
158 """ |
|
159 Returns the latest item's pubdate. If none of them have a pubdate, |
|
160 this returns the current date/time. |
|
161 """ |
|
162 updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None] |
|
163 if len(updates) > 0: |
|
164 updates.sort() |
|
165 return updates[-1] |
|
166 else: |
|
167 return datetime.datetime.now() |
|
168 |
|
169 class Enclosure(object): |
|
170 "Represents an RSS enclosure" |
|
171 def __init__(self, url, length, mime_type): |
|
172 "All args are expected to be Python Unicode objects" |
|
173 self.length, self.mime_type = length, mime_type |
|
174 self.url = iri_to_uri(url) |
|
175 |
|
176 class RssFeed(SyndicationFeed): |
|
177 mime_type = 'application/rss+xml' |
|
178 def write(self, outfile, encoding): |
|
179 handler = SimplerXMLGenerator(outfile, encoding) |
|
180 handler.startDocument() |
|
181 handler.startElement(u"rss", self.rss_attributes()) |
|
182 handler.startElement(u"channel", self.root_attributes()) |
|
183 self.add_root_elements(handler) |
|
184 self.write_items(handler) |
|
185 self.endChannelElement(handler) |
|
186 handler.endElement(u"rss") |
|
187 |
|
188 def rss_attributes(self): |
|
189 return {u"version": self._version} |
|
190 |
|
191 def write_items(self, handler): |
|
192 for item in self.items: |
|
193 handler.startElement(u'item', self.item_attributes(item)) |
|
194 self.add_item_elements(handler, item) |
|
195 handler.endElement(u"item") |
|
196 |
|
197 def add_root_elements(self, handler): |
|
198 handler.addQuickElement(u"title", self.feed['title']) |
|
199 handler.addQuickElement(u"link", self.feed['link']) |
|
200 handler.addQuickElement(u"description", self.feed['description']) |
|
201 if self.feed['language'] is not None: |
|
202 handler.addQuickElement(u"language", self.feed['language']) |
|
203 for cat in self.feed['categories']: |
|
204 handler.addQuickElement(u"category", cat) |
|
205 if self.feed['feed_copyright'] is not None: |
|
206 handler.addQuickElement(u"copyright", self.feed['feed_copyright']) |
|
207 handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('utf-8')) |
|
208 if self.feed['ttl'] is not None: |
|
209 handler.addQuickElement(u"ttl", self.feed['ttl']) |
|
210 |
|
211 def endChannelElement(self, handler): |
|
212 handler.endElement(u"channel") |
|
213 |
|
214 class RssUserland091Feed(RssFeed): |
|
215 _version = u"0.91" |
|
216 def add_item_elements(self, handler, item): |
|
217 handler.addQuickElement(u"title", item['title']) |
|
218 handler.addQuickElement(u"link", item['link']) |
|
219 if item['description'] is not None: |
|
220 handler.addQuickElement(u"description", item['description']) |
|
221 |
|
222 class Rss201rev2Feed(RssFeed): |
|
223 # Spec: http://blogs.law.harvard.edu/tech/rss |
|
224 _version = u"2.0" |
|
225 def add_item_elements(self, handler, item): |
|
226 handler.addQuickElement(u"title", item['title']) |
|
227 handler.addQuickElement(u"link", item['link']) |
|
228 if item['description'] is not None: |
|
229 handler.addQuickElement(u"description", item['description']) |
|
230 |
|
231 # Author information. |
|
232 if item["author_name"] and item["author_email"]: |
|
233 handler.addQuickElement(u"author", "%s (%s)" % \ |
|
234 (item['author_email'], item['author_name'])) |
|
235 elif item["author_email"]: |
|
236 handler.addQuickElement(u"author", item["author_email"]) |
|
237 elif item["author_name"]: |
|
238 handler.addQuickElement(u"dc:creator", item["author_name"], {"xmlns:dc": u"http://purl.org/dc/elements/1.1/"}) |
|
239 |
|
240 if item['pubdate'] is not None: |
|
241 handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('utf-8')) |
|
242 if item['comments'] is not None: |
|
243 handler.addQuickElement(u"comments", item['comments']) |
|
244 if item['unique_id'] is not None: |
|
245 handler.addQuickElement(u"guid", item['unique_id']) |
|
246 if item['ttl'] is not None: |
|
247 handler.addQuickElement(u"ttl", item['ttl']) |
|
248 |
|
249 # Enclosure. |
|
250 if item['enclosure'] is not None: |
|
251 handler.addQuickElement(u"enclosure", '', |
|
252 {u"url": item['enclosure'].url, u"length": item['enclosure'].length, |
|
253 u"type": item['enclosure'].mime_type}) |
|
254 |
|
255 # Categories. |
|
256 for cat in item['categories']: |
|
257 handler.addQuickElement(u"category", cat) |
|
258 |
|
259 class Atom1Feed(SyndicationFeed): |
|
260 # Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html |
|
261 mime_type = 'application/atom+xml' |
|
262 ns = u"http://www.w3.org/2005/Atom" |
|
263 |
|
264 def write(self, outfile, encoding): |
|
265 handler = SimplerXMLGenerator(outfile, encoding) |
|
266 handler.startDocument() |
|
267 handler.startElement(u'feed', self.root_attributes()) |
|
268 self.add_root_elements(handler) |
|
269 self.write_items(handler) |
|
270 handler.endElement(u"feed") |
|
271 |
|
272 def root_attributes(self): |
|
273 if self.feed['language'] is not None: |
|
274 return {u"xmlns": self.ns, u"xml:lang": self.feed['language']} |
|
275 else: |
|
276 return {u"xmlns": self.ns} |
|
277 |
|
278 def add_root_elements(self, handler): |
|
279 handler.addQuickElement(u"title", self.feed['title']) |
|
280 handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']}) |
|
281 if self.feed['feed_url'] is not None: |
|
282 handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']}) |
|
283 handler.addQuickElement(u"id", self.feed['id']) |
|
284 handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('utf-8')) |
|
285 if self.feed['author_name'] is not None: |
|
286 handler.startElement(u"author", {}) |
|
287 handler.addQuickElement(u"name", self.feed['author_name']) |
|
288 if self.feed['author_email'] is not None: |
|
289 handler.addQuickElement(u"email", self.feed['author_email']) |
|
290 if self.feed['author_link'] is not None: |
|
291 handler.addQuickElement(u"uri", self.feed['author_link']) |
|
292 handler.endElement(u"author") |
|
293 if self.feed['subtitle'] is not None: |
|
294 handler.addQuickElement(u"subtitle", self.feed['subtitle']) |
|
295 for cat in self.feed['categories']: |
|
296 handler.addQuickElement(u"category", "", {u"term": cat}) |
|
297 if self.feed['feed_copyright'] is not None: |
|
298 handler.addQuickElement(u"rights", self.feed['feed_copyright']) |
|
299 |
|
300 def write_items(self, handler): |
|
301 for item in self.items: |
|
302 handler.startElement(u"entry", self.item_attributes(item)) |
|
303 self.add_item_elements(handler, item) |
|
304 handler.endElement(u"entry") |
|
305 |
|
306 def add_item_elements(self, handler, item): |
|
307 handler.addQuickElement(u"title", item['title']) |
|
308 handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"}) |
|
309 if item['pubdate'] is not None: |
|
310 handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8')) |
|
311 |
|
312 # Author information. |
|
313 if item['author_name'] is not None: |
|
314 handler.startElement(u"author", {}) |
|
315 handler.addQuickElement(u"name", item['author_name']) |
|
316 if item['author_email'] is not None: |
|
317 handler.addQuickElement(u"email", item['author_email']) |
|
318 if item['author_link'] is not None: |
|
319 handler.addQuickElement(u"uri", item['author_link']) |
|
320 handler.endElement(u"author") |
|
321 |
|
322 # Unique ID. |
|
323 if item['unique_id'] is not None: |
|
324 unique_id = item['unique_id'] |
|
325 else: |
|
326 unique_id = get_tag_uri(item['link'], item['pubdate']) |
|
327 handler.addQuickElement(u"id", unique_id) |
|
328 |
|
329 # Summary. |
|
330 if item['description'] is not None: |
|
331 handler.addQuickElement(u"summary", item['description'], {u"type": u"html"}) |
|
332 |
|
333 # Enclosure. |
|
334 if item['enclosure'] is not None: |
|
335 handler.addQuickElement(u"link", '', |
|
336 {u"rel": u"enclosure", |
|
337 u"href": item['enclosure'].url, |
|
338 u"length": item['enclosure'].length, |
|
339 u"type": item['enclosure'].mime_type}) |
|
340 |
|
341 # Categories. |
|
342 for cat in item['categories']: |
|
343 handler.addQuickElement(u"category", u"", {u"term": cat}) |
|
344 |
|
345 # Rights. |
|
346 if item['item_copyright'] is not None: |
|
347 handler.addQuickElement(u"rights", item['item_copyright']) |
|
348 |
|
349 # This isolates the decision of what the system default is, so calling code can |
|
350 # do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed". |
|
351 DefaultFeed = Rss201rev2Feed |