|
0
|
1 |
""" |
|
|
2 |
Syndication feed generation library -- used for generating RSS, etc. |
|
|
3 |
|
|
|
4 |
Sample usage: |
|
|
5 |
|
|
|
6 |
>>> from django.utils import feedgenerator |
|
|
7 |
>>> feed = feedgenerator.Rss201rev2Feed( |
|
|
8 |
... title=u"Poynter E-Media Tidbits", |
|
|
9 |
... link=u"http://www.poynter.org/column.asp?id=31", |
|
|
10 |
... description=u"A group weblog by the sharpest minds in online media/journalism/publishing.", |
|
|
11 |
... language=u"en", |
|
|
12 |
... ) |
|
|
13 |
>>> feed.add_item(title="Hello", link=u"http://www.holovaty.com/test/", description="Testing.") |
|
|
14 |
>>> fp = open('test.rss', 'w') |
|
|
15 |
>>> feed.write(fp, 'utf-8') |
|
|
16 |
>>> fp.close() |
|
|
17 |
|
|
|
18 |
For definitions of the different versions of RSS, see: |
|
|
19 |
http://diveintomark.org/archives/2004/02/04/incompatible-rss |
|
|
20 |
""" |
|
|
21 |
|
|
|
22 |
import re |
|
|
23 |
import datetime |
|
|
24 |
from django.utils.xmlutils import SimplerXMLGenerator |
|
|
25 |
from django.utils.encoding import force_unicode, iri_to_uri |
|
|
26 |
|
|
|
27 |
def rfc2822_date(date): |
|
|
28 |
# We do this ourselves to be timezone aware, email.Utils is not tz aware. |
|
|
29 |
if date.tzinfo: |
|
|
30 |
time_str = date.strftime('%a, %d %b %Y %H:%M:%S ') |
|
|
31 |
offset = date.tzinfo.utcoffset(date) |
|
|
32 |
timezone = (offset.days * 24 * 60) + (offset.seconds / 60) |
|
|
33 |
hour, minute = divmod(timezone, 60) |
|
|
34 |
return time_str + "%+03d%02d" % (hour, minute) |
|
|
35 |
else: |
|
|
36 |
return date.strftime('%a, %d %b %Y %H:%M:%S -0000') |
|
|
37 |
|
|
|
38 |
def rfc3339_date(date): |
|
|
39 |
if date.tzinfo: |
|
|
40 |
time_str = date.strftime('%Y-%m-%dT%H:%M:%S') |
|
|
41 |
offset = date.tzinfo.utcoffset(date) |
|
|
42 |
timezone = (offset.days * 24 * 60) + (offset.seconds / 60) |
|
|
43 |
hour, minute = divmod(timezone, 60) |
|
|
44 |
return time_str + "%+03d:%02d" % (hour, minute) |
|
|
45 |
else: |
|
|
46 |
return date.strftime('%Y-%m-%dT%H:%M:%SZ') |
|
|
47 |
|
|
|
48 |
def get_tag_uri(url, date): |
|
|
49 |
"Creates a TagURI. See http://diveintomark.org/archives/2004/05/28/howto-atom-id" |
|
|
50 |
tag = re.sub('^http://', '', url) |
|
|
51 |
if date is not None: |
|
|
52 |
tag = re.sub('/', ',%s:/' % date.strftime('%Y-%m-%d'), tag, 1) |
|
|
53 |
tag = re.sub('#', '/', tag) |
|
|
54 |
return u'tag:' + tag |
|
|
55 |
|
|
|
56 |
class SyndicationFeed(object): |
|
|
57 |
"Base class for all syndication feeds. Subclasses should provide write()" |
|
|
58 |
def __init__(self, title, link, description, language=None, author_email=None, |
|
|
59 |
author_name=None, author_link=None, subtitle=None, categories=None, |
|
|
60 |
feed_url=None, feed_copyright=None, feed_guid=None, ttl=None, **kwargs): |
|
|
61 |
to_unicode = lambda s: force_unicode(s, strings_only=True) |
|
|
62 |
if categories: |
|
|
63 |
categories = [force_unicode(c) for c in categories] |
|
|
64 |
self.feed = { |
|
|
65 |
'title': to_unicode(title), |
|
|
66 |
'link': iri_to_uri(link), |
|
|
67 |
'description': to_unicode(description), |
|
|
68 |
'language': to_unicode(language), |
|
|
69 |
'author_email': to_unicode(author_email), |
|
|
70 |
'author_name': to_unicode(author_name), |
|
|
71 |
'author_link': iri_to_uri(author_link), |
|
|
72 |
'subtitle': to_unicode(subtitle), |
|
|
73 |
'categories': categories or (), |
|
|
74 |
'feed_url': iri_to_uri(feed_url), |
|
|
75 |
'feed_copyright': to_unicode(feed_copyright), |
|
|
76 |
'id': feed_guid or link, |
|
|
77 |
'ttl': ttl, |
|
|
78 |
} |
|
|
79 |
self.feed.update(kwargs) |
|
|
80 |
self.items = [] |
|
|
81 |
|
|
|
82 |
def add_item(self, title, link, description, author_email=None, |
|
|
83 |
author_name=None, author_link=None, pubdate=None, comments=None, |
|
|
84 |
unique_id=None, enclosure=None, categories=(), item_copyright=None, |
|
|
85 |
ttl=None, **kwargs): |
|
|
86 |
""" |
|
|
87 |
Adds an item to the feed. All args are expected to be Python Unicode |
|
|
88 |
objects except pubdate, which is a datetime.datetime object, and |
|
|
89 |
enclosure, which is an instance of the Enclosure class. |
|
|
90 |
""" |
|
|
91 |
to_unicode = lambda s: force_unicode(s, strings_only=True) |
|
|
92 |
if categories: |
|
|
93 |
categories = [to_unicode(c) for c in categories] |
|
|
94 |
item = { |
|
|
95 |
'title': to_unicode(title), |
|
|
96 |
'link': iri_to_uri(link), |
|
|
97 |
'description': to_unicode(description), |
|
|
98 |
'author_email': to_unicode(author_email), |
|
|
99 |
'author_name': to_unicode(author_name), |
|
|
100 |
'author_link': iri_to_uri(author_link), |
|
|
101 |
'pubdate': pubdate, |
|
|
102 |
'comments': to_unicode(comments), |
|
|
103 |
'unique_id': to_unicode(unique_id), |
|
|
104 |
'enclosure': enclosure, |
|
|
105 |
'categories': categories or (), |
|
|
106 |
'item_copyright': to_unicode(item_copyright), |
|
|
107 |
'ttl': ttl, |
|
|
108 |
} |
|
|
109 |
item.update(kwargs) |
|
|
110 |
self.items.append(item) |
|
|
111 |
|
|
|
112 |
def num_items(self): |
|
|
113 |
return len(self.items) |
|
|
114 |
|
|
|
115 |
def root_attributes(self): |
|
|
116 |
""" |
|
|
117 |
Return extra attributes to place on the root (i.e. feed/channel) element. |
|
|
118 |
Called from write(). |
|
|
119 |
""" |
|
|
120 |
return {} |
|
|
121 |
|
|
|
122 |
def add_root_elements(self, handler): |
|
|
123 |
""" |
|
|
124 |
Add elements in the root (i.e. feed/channel) element. Called |
|
|
125 |
from write(). |
|
|
126 |
""" |
|
|
127 |
pass |
|
|
128 |
|
|
|
129 |
def item_attributes(self, item): |
|
|
130 |
""" |
|
|
131 |
Return extra attributes to place on each item (i.e. item/entry) element. |
|
|
132 |
""" |
|
|
133 |
return {} |
|
|
134 |
|
|
|
135 |
def add_item_elements(self, handler, item): |
|
|
136 |
""" |
|
|
137 |
Add elements on each item (i.e. item/entry) element. |
|
|
138 |
""" |
|
|
139 |
pass |
|
|
140 |
|
|
|
141 |
def write(self, outfile, encoding): |
|
|
142 |
""" |
|
|
143 |
Outputs the feed in the given encoding to outfile, which is a file-like |
|
|
144 |
object. Subclasses should override this. |
|
|
145 |
""" |
|
|
146 |
raise NotImplementedError |
|
|
147 |
|
|
|
148 |
def writeString(self, encoding): |
|
|
149 |
""" |
|
|
150 |
Returns the feed in the given encoding as a string. |
|
|
151 |
""" |
|
|
152 |
from StringIO import StringIO |
|
|
153 |
s = StringIO() |
|
|
154 |
self.write(s, encoding) |
|
|
155 |
return s.getvalue() |
|
|
156 |
|
|
|
157 |
def latest_post_date(self): |
|
|
158 |
""" |
|
|
159 |
Returns the latest item's pubdate. If none of them have a pubdate, |
|
|
160 |
this returns the current date/time. |
|
|
161 |
""" |
|
|
162 |
updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None] |
|
|
163 |
if len(updates) > 0: |
|
|
164 |
updates.sort() |
|
|
165 |
return updates[-1] |
|
|
166 |
else: |
|
|
167 |
return datetime.datetime.now() |
|
|
168 |
|
|
|
169 |
class Enclosure(object): |
|
|
170 |
"Represents an RSS enclosure" |
|
|
171 |
def __init__(self, url, length, mime_type): |
|
|
172 |
"All args are expected to be Python Unicode objects" |
|
|
173 |
self.length, self.mime_type = length, mime_type |
|
|
174 |
self.url = iri_to_uri(url) |
|
|
175 |
|
|
|
176 |
class RssFeed(SyndicationFeed): |
|
|
177 |
mime_type = 'application/rss+xml' |
|
|
178 |
def write(self, outfile, encoding): |
|
|
179 |
handler = SimplerXMLGenerator(outfile, encoding) |
|
|
180 |
handler.startDocument() |
|
|
181 |
handler.startElement(u"rss", self.rss_attributes()) |
|
|
182 |
handler.startElement(u"channel", self.root_attributes()) |
|
|
183 |
self.add_root_elements(handler) |
|
|
184 |
self.write_items(handler) |
|
|
185 |
self.endChannelElement(handler) |
|
|
186 |
handler.endElement(u"rss") |
|
|
187 |
|
|
|
188 |
def rss_attributes(self): |
|
|
189 |
return {u"version": self._version} |
|
|
190 |
|
|
|
191 |
def write_items(self, handler): |
|
|
192 |
for item in self.items: |
|
|
193 |
handler.startElement(u'item', self.item_attributes(item)) |
|
|
194 |
self.add_item_elements(handler, item) |
|
|
195 |
handler.endElement(u"item") |
|
|
196 |
|
|
|
197 |
def add_root_elements(self, handler): |
|
|
198 |
handler.addQuickElement(u"title", self.feed['title']) |
|
|
199 |
handler.addQuickElement(u"link", self.feed['link']) |
|
|
200 |
handler.addQuickElement(u"description", self.feed['description']) |
|
|
201 |
if self.feed['language'] is not None: |
|
|
202 |
handler.addQuickElement(u"language", self.feed['language']) |
|
|
203 |
for cat in self.feed['categories']: |
|
|
204 |
handler.addQuickElement(u"category", cat) |
|
|
205 |
if self.feed['feed_copyright'] is not None: |
|
|
206 |
handler.addQuickElement(u"copyright", self.feed['feed_copyright']) |
|
|
207 |
handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('utf-8')) |
|
|
208 |
if self.feed['ttl'] is not None: |
|
|
209 |
handler.addQuickElement(u"ttl", self.feed['ttl']) |
|
|
210 |
|
|
|
211 |
def endChannelElement(self, handler): |
|
|
212 |
handler.endElement(u"channel") |
|
|
213 |
|
|
|
214 |
class RssUserland091Feed(RssFeed): |
|
|
215 |
_version = u"0.91" |
|
|
216 |
def add_item_elements(self, handler, item): |
|
|
217 |
handler.addQuickElement(u"title", item['title']) |
|
|
218 |
handler.addQuickElement(u"link", item['link']) |
|
|
219 |
if item['description'] is not None: |
|
|
220 |
handler.addQuickElement(u"description", item['description']) |
|
|
221 |
|
|
|
222 |
class Rss201rev2Feed(RssFeed): |
|
|
223 |
# Spec: http://blogs.law.harvard.edu/tech/rss |
|
|
224 |
_version = u"2.0" |
|
|
225 |
def add_item_elements(self, handler, item): |
|
|
226 |
handler.addQuickElement(u"title", item['title']) |
|
|
227 |
handler.addQuickElement(u"link", item['link']) |
|
|
228 |
if item['description'] is not None: |
|
|
229 |
handler.addQuickElement(u"description", item['description']) |
|
|
230 |
|
|
|
231 |
# Author information. |
|
|
232 |
if item["author_name"] and item["author_email"]: |
|
|
233 |
handler.addQuickElement(u"author", "%s (%s)" % \ |
|
|
234 |
(item['author_email'], item['author_name'])) |
|
|
235 |
elif item["author_email"]: |
|
|
236 |
handler.addQuickElement(u"author", item["author_email"]) |
|
|
237 |
elif item["author_name"]: |
|
|
238 |
handler.addQuickElement(u"dc:creator", item["author_name"], {"xmlns:dc": u"http://purl.org/dc/elements/1.1/"}) |
|
|
239 |
|
|
|
240 |
if item['pubdate'] is not None: |
|
|
241 |
handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('utf-8')) |
|
|
242 |
if item['comments'] is not None: |
|
|
243 |
handler.addQuickElement(u"comments", item['comments']) |
|
|
244 |
if item['unique_id'] is not None: |
|
|
245 |
handler.addQuickElement(u"guid", item['unique_id']) |
|
|
246 |
if item['ttl'] is not None: |
|
|
247 |
handler.addQuickElement(u"ttl", item['ttl']) |
|
|
248 |
|
|
|
249 |
# Enclosure. |
|
|
250 |
if item['enclosure'] is not None: |
|
|
251 |
handler.addQuickElement(u"enclosure", '', |
|
|
252 |
{u"url": item['enclosure'].url, u"length": item['enclosure'].length, |
|
|
253 |
u"type": item['enclosure'].mime_type}) |
|
|
254 |
|
|
|
255 |
# Categories. |
|
|
256 |
for cat in item['categories']: |
|
|
257 |
handler.addQuickElement(u"category", cat) |
|
|
258 |
|
|
|
259 |
class Atom1Feed(SyndicationFeed): |
|
|
260 |
# Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html |
|
|
261 |
mime_type = 'application/atom+xml' |
|
|
262 |
ns = u"http://www.w3.org/2005/Atom" |
|
|
263 |
|
|
|
264 |
def write(self, outfile, encoding): |
|
|
265 |
handler = SimplerXMLGenerator(outfile, encoding) |
|
|
266 |
handler.startDocument() |
|
|
267 |
handler.startElement(u'feed', self.root_attributes()) |
|
|
268 |
self.add_root_elements(handler) |
|
|
269 |
self.write_items(handler) |
|
|
270 |
handler.endElement(u"feed") |
|
|
271 |
|
|
|
272 |
def root_attributes(self): |
|
|
273 |
if self.feed['language'] is not None: |
|
|
274 |
return {u"xmlns": self.ns, u"xml:lang": self.feed['language']} |
|
|
275 |
else: |
|
|
276 |
return {u"xmlns": self.ns} |
|
|
277 |
|
|
|
278 |
def add_root_elements(self, handler): |
|
|
279 |
handler.addQuickElement(u"title", self.feed['title']) |
|
|
280 |
handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']}) |
|
|
281 |
if self.feed['feed_url'] is not None: |
|
|
282 |
handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']}) |
|
|
283 |
handler.addQuickElement(u"id", self.feed['id']) |
|
|
284 |
handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('utf-8')) |
|
|
285 |
if self.feed['author_name'] is not None: |
|
|
286 |
handler.startElement(u"author", {}) |
|
|
287 |
handler.addQuickElement(u"name", self.feed['author_name']) |
|
|
288 |
if self.feed['author_email'] is not None: |
|
|
289 |
handler.addQuickElement(u"email", self.feed['author_email']) |
|
|
290 |
if self.feed['author_link'] is not None: |
|
|
291 |
handler.addQuickElement(u"uri", self.feed['author_link']) |
|
|
292 |
handler.endElement(u"author") |
|
|
293 |
if self.feed['subtitle'] is not None: |
|
|
294 |
handler.addQuickElement(u"subtitle", self.feed['subtitle']) |
|
|
295 |
for cat in self.feed['categories']: |
|
|
296 |
handler.addQuickElement(u"category", "", {u"term": cat}) |
|
|
297 |
if self.feed['feed_copyright'] is not None: |
|
|
298 |
handler.addQuickElement(u"rights", self.feed['feed_copyright']) |
|
|
299 |
|
|
|
300 |
def write_items(self, handler): |
|
|
301 |
for item in self.items: |
|
|
302 |
handler.startElement(u"entry", self.item_attributes(item)) |
|
|
303 |
self.add_item_elements(handler, item) |
|
|
304 |
handler.endElement(u"entry") |
|
|
305 |
|
|
|
306 |
def add_item_elements(self, handler, item): |
|
|
307 |
handler.addQuickElement(u"title", item['title']) |
|
|
308 |
handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"}) |
|
|
309 |
if item['pubdate'] is not None: |
|
|
310 |
handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8')) |
|
|
311 |
|
|
|
312 |
# Author information. |
|
|
313 |
if item['author_name'] is not None: |
|
|
314 |
handler.startElement(u"author", {}) |
|
|
315 |
handler.addQuickElement(u"name", item['author_name']) |
|
|
316 |
if item['author_email'] is not None: |
|
|
317 |
handler.addQuickElement(u"email", item['author_email']) |
|
|
318 |
if item['author_link'] is not None: |
|
|
319 |
handler.addQuickElement(u"uri", item['author_link']) |
|
|
320 |
handler.endElement(u"author") |
|
|
321 |
|
|
|
322 |
# Unique ID. |
|
|
323 |
if item['unique_id'] is not None: |
|
|
324 |
unique_id = item['unique_id'] |
|
|
325 |
else: |
|
|
326 |
unique_id = get_tag_uri(item['link'], item['pubdate']) |
|
|
327 |
handler.addQuickElement(u"id", unique_id) |
|
|
328 |
|
|
|
329 |
# Summary. |
|
|
330 |
if item['description'] is not None: |
|
|
331 |
handler.addQuickElement(u"summary", item['description'], {u"type": u"html"}) |
|
|
332 |
|
|
|
333 |
# Enclosure. |
|
|
334 |
if item['enclosure'] is not None: |
|
|
335 |
handler.addQuickElement(u"link", '', |
|
|
336 |
{u"rel": u"enclosure", |
|
|
337 |
u"href": item['enclosure'].url, |
|
|
338 |
u"length": item['enclosure'].length, |
|
|
339 |
u"type": item['enclosure'].mime_type}) |
|
|
340 |
|
|
|
341 |
# Categories. |
|
|
342 |
for cat in item['categories']: |
|
|
343 |
handler.addQuickElement(u"category", u"", {u"term": cat}) |
|
|
344 |
|
|
|
345 |
# Rights. |
|
|
346 |
if item['item_copyright'] is not None: |
|
|
347 |
handler.addQuickElement(u"rights", item['item_copyright']) |
|
|
348 |
|
|
|
349 |
# This isolates the decision of what the system default is, so calling code can |
|
|
350 |
# do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed". |
|
|
351 |
DefaultFeed = Rss201rev2Feed |