|
0
|
1 |
""" |
|
|
2 |
This module contains helper functions for controlling caching. It does so by |
|
|
3 |
managing the "Vary" header of responses. It includes functions to patch the |
|
|
4 |
header of response objects directly and decorators that change functions to do |
|
|
5 |
that header-patching themselves. |
|
|
6 |
|
|
|
7 |
For information on the Vary header, see: |
|
|
8 |
|
|
|
9 |
http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.44 |
|
|
10 |
|
|
|
11 |
Essentially, the "Vary" HTTP header defines which headers a cache should take |
|
|
12 |
into account when building its cache key. Requests with the same path but |
|
|
13 |
different header content for headers named in "Vary" need to get different |
|
|
14 |
cache keys to prevent delivery of wrong content. |
|
|
15 |
|
|
|
16 |
An example: i18n middleware would need to distinguish caches by the |
|
|
17 |
"Accept-language" header. |
|
|
18 |
""" |
|
|
19 |
|
|
|
20 |
import re |
|
|
21 |
import time |
|
|
22 |
try: |
|
|
23 |
set |
|
|
24 |
except NameError: |
|
|
25 |
from sets import Set as set # Python 2.3 fallback |
|
|
26 |
|
|
|
27 |
from django.conf import settings |
|
|
28 |
from django.core.cache import cache |
|
|
29 |
from django.utils.encoding import smart_str, iri_to_uri |
|
|
30 |
from django.utils.http import http_date |
|
|
31 |
from django.utils.hashcompat import md5_constructor |
|
|
32 |
from django.http import HttpRequest |
|
|
33 |
|
|
|
34 |
cc_delim_re = re.compile(r'\s*,\s*') |
|
|
35 |
|
|
|
36 |
def patch_cache_control(response, **kwargs): |
|
|
37 |
""" |
|
|
38 |
This function patches the Cache-Control header by adding all |
|
|
39 |
keyword arguments to it. The transformation is as follows: |
|
|
40 |
|
|
|
41 |
* All keyword parameter names are turned to lowercase, and underscores |
|
|
42 |
are converted to hyphens. |
|
|
43 |
* If the value of a parameter is True (exactly True, not just a |
|
|
44 |
true value), only the parameter name is added to the header. |
|
|
45 |
* All other parameters are added with their value, after applying |
|
|
46 |
str() to it. |
|
|
47 |
""" |
|
|
48 |
def dictitem(s): |
|
|
49 |
t = s.split('=', 1) |
|
|
50 |
if len(t) > 1: |
|
|
51 |
return (t[0].lower(), t[1]) |
|
|
52 |
else: |
|
|
53 |
return (t[0].lower(), True) |
|
|
54 |
|
|
|
55 |
def dictvalue(t): |
|
|
56 |
if t[1] is True: |
|
|
57 |
return t[0] |
|
|
58 |
else: |
|
|
59 |
return t[0] + '=' + smart_str(t[1]) |
|
|
60 |
|
|
|
61 |
if response.has_header('Cache-Control'): |
|
|
62 |
cc = cc_delim_re.split(response['Cache-Control']) |
|
|
63 |
cc = dict([dictitem(el) for el in cc]) |
|
|
64 |
else: |
|
|
65 |
cc = {} |
|
|
66 |
|
|
|
67 |
# If there's already a max-age header but we're being asked to set a new |
|
|
68 |
# max-age, use the minimum of the two ages. In practice this happens when |
|
|
69 |
# a decorator and a piece of middleware both operate on a given view. |
|
|
70 |
if 'max-age' in cc and 'max_age' in kwargs: |
|
|
71 |
kwargs['max_age'] = min(cc['max-age'], kwargs['max_age']) |
|
|
72 |
|
|
|
73 |
for (k, v) in kwargs.items(): |
|
|
74 |
cc[k.replace('_', '-')] = v |
|
|
75 |
cc = ', '.join([dictvalue(el) for el in cc.items()]) |
|
|
76 |
response['Cache-Control'] = cc |
|
|
77 |
|
|
|
78 |
def get_max_age(response): |
|
|
79 |
""" |
|
|
80 |
Returns the max-age from the response Cache-Control header as an integer |
|
|
81 |
(or ``None`` if it wasn't found or wasn't an integer. |
|
|
82 |
""" |
|
|
83 |
if not response.has_header('Cache-Control'): |
|
|
84 |
return |
|
|
85 |
cc = dict([_to_tuple(el) for el in |
|
|
86 |
cc_delim_re.split(response['Cache-Control'])]) |
|
|
87 |
if 'max-age' in cc: |
|
|
88 |
try: |
|
|
89 |
return int(cc['max-age']) |
|
|
90 |
except (ValueError, TypeError): |
|
|
91 |
pass |
|
|
92 |
|
|
|
93 |
def patch_response_headers(response, cache_timeout=None): |
|
|
94 |
""" |
|
|
95 |
Adds some useful headers to the given HttpResponse object: |
|
|
96 |
ETag, Last-Modified, Expires and Cache-Control |
|
|
97 |
|
|
|
98 |
Each header is only added if it isn't already set. |
|
|
99 |
|
|
|
100 |
cache_timeout is in seconds. The CACHE_MIDDLEWARE_SECONDS setting is used |
|
|
101 |
by default. |
|
|
102 |
""" |
|
|
103 |
if cache_timeout is None: |
|
|
104 |
cache_timeout = settings.CACHE_MIDDLEWARE_SECONDS |
|
|
105 |
if cache_timeout < 0: |
|
|
106 |
cache_timeout = 0 # Can't have max-age negative |
|
|
107 |
if not response.has_header('ETag'): |
|
|
108 |
response['ETag'] = '"%s"' % md5_constructor(response.content).hexdigest() |
|
|
109 |
if not response.has_header('Last-Modified'): |
|
|
110 |
response['Last-Modified'] = http_date() |
|
|
111 |
if not response.has_header('Expires'): |
|
|
112 |
response['Expires'] = http_date(time.time() + cache_timeout) |
|
|
113 |
patch_cache_control(response, max_age=cache_timeout) |
|
|
114 |
|
|
|
115 |
def add_never_cache_headers(response): |
|
|
116 |
""" |
|
|
117 |
Adds headers to a response to indicate that a page should never be cached. |
|
|
118 |
""" |
|
|
119 |
patch_response_headers(response, cache_timeout=-1) |
|
|
120 |
|
|
|
121 |
def patch_vary_headers(response, newheaders): |
|
|
122 |
""" |
|
|
123 |
Adds (or updates) the "Vary" header in the given HttpResponse object. |
|
|
124 |
newheaders is a list of header names that should be in "Vary". Existing |
|
|
125 |
headers in "Vary" aren't removed. |
|
|
126 |
""" |
|
|
127 |
# Note that we need to keep the original order intact, because cache |
|
|
128 |
# implementations may rely on the order of the Vary contents in, say, |
|
|
129 |
# computing an MD5 hash. |
|
|
130 |
if response.has_header('Vary'): |
|
|
131 |
vary_headers = cc_delim_re.split(response['Vary']) |
|
|
132 |
else: |
|
|
133 |
vary_headers = [] |
|
|
134 |
# Use .lower() here so we treat headers as case-insensitive. |
|
|
135 |
existing_headers = set([header.lower() for header in vary_headers]) |
|
|
136 |
additional_headers = [newheader for newheader in newheaders |
|
|
137 |
if newheader.lower() not in existing_headers] |
|
|
138 |
response['Vary'] = ', '.join(vary_headers + additional_headers) |
|
|
139 |
|
|
|
140 |
def _generate_cache_key(request, headerlist, key_prefix): |
|
|
141 |
"""Returns a cache key from the headers given in the header list.""" |
|
|
142 |
ctx = md5_constructor() |
|
|
143 |
for header in headerlist: |
|
|
144 |
value = request.META.get(header, None) |
|
|
145 |
if value is not None: |
|
|
146 |
ctx.update(value) |
|
|
147 |
path = md5_constructor(iri_to_uri(request.path)) |
|
|
148 |
return 'views.decorators.cache.cache_page.%s.%s.%s' % ( |
|
|
149 |
key_prefix, path.hexdigest(), ctx.hexdigest()) |
|
|
150 |
|
|
|
151 |
def _generate_cache_header_key(key_prefix, request): |
|
|
152 |
"""Returns a cache key for the header cache.""" |
|
|
153 |
path = md5_constructor(iri_to_uri(request.path)) |
|
|
154 |
return 'views.decorators.cache.cache_header.%s.%s' % (key_prefix, path.hexdigest()) |
|
|
155 |
|
|
|
156 |
def get_cache_key(request, key_prefix=None): |
|
|
157 |
""" |
|
|
158 |
Returns a cache key based on the request path. It can be used in the |
|
|
159 |
request phase because it pulls the list of headers to take into account |
|
|
160 |
from the global path registry and uses those to build a cache key to check |
|
|
161 |
against. |
|
|
162 |
|
|
|
163 |
If there is no headerlist stored, the page needs to be rebuilt, so this |
|
|
164 |
function returns None. |
|
|
165 |
""" |
|
|
166 |
if key_prefix is None: |
|
|
167 |
key_prefix = settings.CACHE_MIDDLEWARE_KEY_PREFIX |
|
|
168 |
cache_key = _generate_cache_header_key(key_prefix, request) |
|
|
169 |
headerlist = cache.get(cache_key, None) |
|
|
170 |
if headerlist is not None: |
|
|
171 |
return _generate_cache_key(request, headerlist, key_prefix) |
|
|
172 |
else: |
|
|
173 |
return None |
|
|
174 |
|
|
|
175 |
def learn_cache_key(request, response, cache_timeout=None, key_prefix=None): |
|
|
176 |
""" |
|
|
177 |
Learns what headers to take into account for some request path from the |
|
|
178 |
response object. It stores those headers in a global path registry so that |
|
|
179 |
later access to that path will know what headers to take into account |
|
|
180 |
without building the response object itself. The headers are named in the |
|
|
181 |
Vary header of the response, but we want to prevent response generation. |
|
|
182 |
|
|
|
183 |
The list of headers to use for cache key generation is stored in the same |
|
|
184 |
cache as the pages themselves. If the cache ages some data out of the |
|
|
185 |
cache, this just means that we have to build the response once to get at |
|
|
186 |
the Vary header and so at the list of headers to use for the cache key. |
|
|
187 |
""" |
|
|
188 |
if key_prefix is None: |
|
|
189 |
key_prefix = settings.CACHE_MIDDLEWARE_KEY_PREFIX |
|
|
190 |
if cache_timeout is None: |
|
|
191 |
cache_timeout = settings.CACHE_MIDDLEWARE_SECONDS |
|
|
192 |
cache_key = _generate_cache_header_key(key_prefix, request) |
|
|
193 |
if response.has_header('Vary'): |
|
|
194 |
headerlist = ['HTTP_'+header.upper().replace('-', '_') |
|
|
195 |
for header in cc_delim_re.split(response['Vary'])] |
|
|
196 |
cache.set(cache_key, headerlist, cache_timeout) |
|
|
197 |
return _generate_cache_key(request, headerlist, key_prefix) |
|
|
198 |
else: |
|
|
199 |
# if there is no Vary header, we still need a cache key |
|
|
200 |
# for the request.path |
|
|
201 |
cache.set(cache_key, [], cache_timeout) |
|
|
202 |
return _generate_cache_key(request, [], key_prefix) |
|
|
203 |
|
|
|
204 |
|
|
|
205 |
def _to_tuple(s): |
|
|
206 |
t = s.split('=',1) |
|
|
207 |
if len(t) == 2: |
|
|
208 |
return t[0].lower(), t[1] |
|
|
209 |
return t[0].lower(), True |