web/lib/django/middleware/common.py
author ymh <ymh.work@gmail.com>
Thu, 05 Aug 2010 17:28:09 +0200
changeset 50 012451a812f1
parent 38 77b6da96e6f1
permissions -rw-r--r--
Merge with a2711e44ba5de8b1675d7e0ee6aaa4a6c56a9b46
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
38
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
import re
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
from django.conf import settings
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
from django import http
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
from django.core.mail import mail_managers
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
from django.utils.http import urlquote
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
from django.core import urlresolvers
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
from django.utils.hashcompat import md5_constructor
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
class CommonMiddleware(object):
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
    """
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
    "Common" middleware for taking care of some basic operations:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
        - Forbids access to User-Agents in settings.DISALLOWED_USER_AGENTS
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
        - URL rewriting: Based on the APPEND_SLASH and PREPEND_WWW settings,
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
          this middleware appends missing slashes and/or prepends missing
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
          "www."s.
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
            - If APPEND_SLASH is set and the initial URL doesn't end with a
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
              slash, and it is not found in urlpatterns, a new URL is formed by
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
              appending a slash at the end. If this new URL is found in
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
              urlpatterns, then an HTTP-redirect is returned to this new URL;
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
              otherwise the initial URL is processed as usual.
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
        - ETags: If the USE_ETAGS setting is set, ETags will be calculated from
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
          the entire page content and Not Modified responses will be returned
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
          appropriately.
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
    """
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
    def process_request(self, request):
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
        """
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
        Check for denied User-Agents and rewrite the URL based on
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
        settings.APPEND_SLASH and settings.PREPEND_WWW
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
        """
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
        # Check for denied User-Agents
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
        if 'HTTP_USER_AGENT' in request.META:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
            for user_agent_regex in settings.DISALLOWED_USER_AGENTS:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
                if user_agent_regex.search(request.META['HTTP_USER_AGENT']):
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
                    return http.HttpResponseForbidden('<h1>Forbidden</h1>')
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
        # Check for a redirect based on settings.APPEND_SLASH
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
        # and settings.PREPEND_WWW
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
        host = request.get_host()
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
        old_url = [host, request.path]
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
        new_url = old_url[:]
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
        if (settings.PREPEND_WWW and old_url[0] and
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
                not old_url[0].startswith('www.')):
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
            new_url[0] = 'www.' + old_url[0]
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
        # Append a slash if APPEND_SLASH is set and the URL doesn't have a
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
        # trailing slash and there is no pattern for the current path
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
        if settings.APPEND_SLASH and (not old_url[1].endswith('/')):
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
            urlconf = getattr(request, 'urlconf', None)
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
            if (not _is_valid_path(request.path_info, urlconf) and
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
                    _is_valid_path("%s/" % request.path_info, urlconf)):
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
                new_url[1] = new_url[1] + '/'
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
                if settings.DEBUG and request.method == 'POST':
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
                    raise RuntimeError, (""
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
                    "You called this URL via POST, but the URL doesn't end "
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
                    "in a slash and you have APPEND_SLASH set. Django can't "
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
                    "redirect to the slash URL while maintaining POST data. "
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
                    "Change your form to point to %s%s (note the trailing "
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
                    "slash), or set APPEND_SLASH=False in your Django "
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
                    "settings.") % (new_url[0], new_url[1])
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
        if new_url == old_url:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
            # No redirects required.
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
            return
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
        if new_url[0]:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
            newurl = "%s://%s%s" % (
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
                request.is_secure() and 'https' or 'http',
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
                new_url[0], urlquote(new_url[1]))
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
        else:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
            newurl = urlquote(new_url[1])
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
        if request.GET:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
            newurl += '?' + request.META['QUERY_STRING']
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
        return http.HttpResponsePermanentRedirect(newurl)
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
    def process_response(self, request, response):
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
        "Check for a flat page (for 404s) and calculate the Etag, if needed."
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
        if response.status_code == 404:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
            if settings.SEND_BROKEN_LINK_EMAILS:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
                # If the referrer was from an internal link or a non-search-engine site,
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
                # send a note to the managers.
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
                domain = request.get_host()
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
                referer = request.META.get('HTTP_REFERER', None)
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
                is_internal = _is_internal_request(domain, referer)
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
                path = request.get_full_path()
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
                if referer and not _is_ignorable_404(path) and (is_internal or '?' not in referer):
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
                    ua = request.META.get('HTTP_USER_AGENT', '<none>')
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
                    ip = request.META.get('REMOTE_ADDR', '<none>')
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
                    mail_managers("Broken %slink on %s" % ((is_internal and 'INTERNAL ' or ''), domain),
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
                        "Referrer: %s\nRequested URL: %s\nUser agent: %s\nIP address: %s\n" \
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
                                  % (referer, request.get_full_path(), ua, ip))
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
                return response
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
        # Use ETags, if requested.
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
        if settings.USE_ETAGS:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
            if response.has_header('ETag'):
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
                etag = response['ETag']
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
            else:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
                etag = '"%s"' % md5_constructor(response.content).hexdigest()
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   106
            if response.status_code >= 200 and response.status_code < 300 and request.META.get('HTTP_IF_NONE_MATCH') == etag:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   107
                cookies = response.cookies
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
                response = http.HttpResponseNotModified()
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
                response.cookies = cookies
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   110
            else:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   111
                response['ETag'] = etag
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   112
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   113
        return response
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   115
def _is_ignorable_404(uri):
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   116
    """
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
    Returns True if a 404 at the given URL *shouldn't* notify the site managers.
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   118
    """
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   119
    for start in settings.IGNORABLE_404_STARTS:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   120
        if uri.startswith(start):
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   121
            return True
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   122
    for end in settings.IGNORABLE_404_ENDS:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   123
        if uri.endswith(end):
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
            return True
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   125
    return False
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   126
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
def _is_internal_request(domain, referer):
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   128
    """
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   129
    Returns true if the referring URL is the same domain as the current request.
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   130
    """
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   131
    # Different subdomains are treated as different domains.
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   132
    return referer is not None and re.match("^https?://%s/" % re.escape(domain), referer)
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   133
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   134
def _is_valid_path(path, urlconf=None):
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   135
    """
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
    Returns True if the given path resolves against the default URL resolver,
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
    False otherwise.
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
    This is a convenience method to make working with "is this a match?" cases
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
    easier, avoiding unnecessarily indented try...except blocks.
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   141
    """
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   142
    try:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   143
        urlresolvers.resolve(path, urlconf)
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   144
        return True
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   145
    except urlresolvers.Resolver404:
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   146
        return False
77b6da96e6f1 update django
ymh <ymh.work@gmail.com>
parents:
diff changeset
   147