web/lib/django/http/multipartparser.py
author ymh <ymh.work@gmail.com>
Thu, 05 Aug 2010 17:28:09 +0200
changeset 50 012451a812f1
parent 0 0d40e90630ef
permissions -rw-r--r--
Merge with a2711e44ba5de8b1675d7e0ee6aaa4a6c56a9b46
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
"""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
Multi-part parsing for file uploads.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
Exposes one class, ``MultiPartParser``, which feeds chunks of uploaded data to
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
file upload handlers for processing.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
"""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
import cgi
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
from django.conf import settings
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
from django.core.exceptions import SuspiciousOperation
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
from django.utils.datastructures import MultiValueDict
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
from django.utils.encoding import force_unicode
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
from django.utils.text import unescape_entities
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
from django.core.files.uploadhandler import StopUpload, SkipFile, StopFutureHandlers
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
__all__ = ('MultiPartParser', 'MultiPartParserError', 'InputStreamExhausted')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
class MultiPartParserError(Exception):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
    pass
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
class InputStreamExhausted(Exception):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
    No more reads are allowed from this device.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
    pass
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
RAW = "raw"
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
FILE = "file"
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
FIELD = "field"
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
class MultiPartParser(object):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
    A rfc2388 multipart/form-data parser.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
    ``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
    and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. If
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
    def __init__(self, META, input_data, upload_handlers, encoding=None):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
        Initialize the MultiPartParser object.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
        :META:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
            The standard ``META`` dictionary in Django request objects.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
        :input_data:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
            The raw post data, as a file-like object.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
        :upload_handler:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
            An UploadHandler instance that performs operations on the uploaded
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
            data.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
        :encoding:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
            The encoding with which to treat the incoming data.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
        #
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
        # Content-Type should containt multipart and the boundary information.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
        #
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
        content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', ''))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
        if not content_type.startswith('multipart/'):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
            raise MultiPartParserError('Invalid Content-Type: %s' % content_type)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
        # Parse the header to get the boundary to split the parts.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
        ctypes, opts = parse_header(content_type)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
        boundary = opts.get('boundary')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
        if not boundary or not cgi.valid_boundary(boundary):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
            raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
        #
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
        # Content-Length should contain the length of the body we are about
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
        # to receive.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
        #
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
        try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
            content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0)))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
        except (ValueError, TypeError):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
            # For now set it to 0; we'll try again later on down.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
            content_length = 0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
        if content_length <= 0:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
            # This means we shouldn't continue...raise an error.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
            raise MultiPartParserError("Invalid content length: %r" % content_length)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
        self._boundary = boundary
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
        self._input_data = input_data
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
        # For compatibility with low-level network APIs (with 32-bit integers),
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
        # the chunk size should be < 2^31, but still divisible by 4.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
        possible_sizes = [x.chunk_size for x in upload_handlers if x.chunk_size]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
        self._chunk_size = min([2**31-4] + possible_sizes)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
        self._meta = META
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
        self._encoding = encoding or settings.DEFAULT_CHARSET
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
        self._content_length = content_length
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
        self._upload_handlers = upload_handlers
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
    def parse(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
        Parse the POST data and break it into a FILES MultiValueDict and a POST
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
        MultiValueDict.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
        Returns a tuple containing the POST and FILES dictionary, respectively.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
        # We have to import QueryDict down here to avoid a circular import.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
        from django.http import QueryDict
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
        encoding = self._encoding
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   106
        handlers = self._upload_handlers
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   107
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
        limited_input_data = LimitBytes(self._input_data, self._content_length)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   110
        # See if the handler will want to take care of the parsing.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   111
        # This allows overriding everything if somebody wants it.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   112
        for handler in handlers:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   113
            result = handler.handle_raw_input(limited_input_data,
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
                                              self._meta,
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   115
                                              self._content_length,
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   116
                                              self._boundary,
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
                                              encoding)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   118
            if result is not None:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   119
                return result[0], result[1]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   120
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   121
        # Create the data structures to be used later.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   122
        self._post = QueryDict('', mutable=True)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   123
        self._files = MultiValueDict()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   125
        # Instantiate the parser and stream:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   126
        stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   128
        # Whether or not to signal a file-completion at the beginning of the loop.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   129
        old_field_name = None
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   130
        counters = [0] * len(handlers)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   131
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   132
        try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   133
            for item_type, meta_data, field_stream in Parser(stream, self._boundary):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   134
                if old_field_name:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   135
                    # We run this at the beginning of the next loop
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
                    # since we cannot be sure a file is complete until
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
                    # we hit the next boundary/part of the multipart content.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
                    self.handle_file_complete(old_field_name, counters)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
                    old_field_name = None
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   141
                try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   142
                    disposition = meta_data['content-disposition'][1]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   143
                    field_name = disposition['name'].strip()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   144
                except (KeyError, IndexError, AttributeError):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   145
                    continue
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   146
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   147
                transfer_encoding = meta_data.get('content-transfer-encoding')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   148
                field_name = force_unicode(field_name, encoding, errors='replace')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   149
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   150
                if item_type == FIELD:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   151
                    # This is a post field, we can just set it in the post
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   152
                    if transfer_encoding == 'base64':
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   153
                        raw_data = field_stream.read()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   154
                        try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   155
                            data = str(raw_data).decode('base64')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   156
                        except:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   157
                            data = raw_data
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   158
                    else:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   159
                        data = field_stream.read()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   160
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   161
                    self._post.appendlist(field_name,
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   162
                                          force_unicode(data, encoding, errors='replace'))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   163
                elif item_type == FILE:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   164
                    # This is a file, use the handler...
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   165
                    file_name = disposition.get('filename')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   166
                    if not file_name:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   167
                        continue
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   168
                    file_name = force_unicode(file_name, encoding, errors='replace')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   169
                    file_name = self.IE_sanitize(unescape_entities(file_name))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   170
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   171
                    content_type = meta_data.get('content-type', ('',))[0].strip()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   172
                    try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   173
                        charset = meta_data.get('content-type', (0,{}))[1].get('charset', None)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   174
                    except:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   175
                        charset = None
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   176
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   177
                    try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   178
                        content_length = int(meta_data.get('content-length')[0])
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   179
                    except (IndexError, TypeError, ValueError):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   180
                        content_length = None
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   181
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   182
                    counters = [0] * len(handlers)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   183
                    try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   184
                        for handler in handlers:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   185
                            try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   186
                                handler.new_file(field_name, file_name,
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   187
                                                 content_type, content_length,
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   188
                                                 charset)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   189
                            except StopFutureHandlers:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   190
                                break
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   191
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   192
                        for chunk in field_stream:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   193
                            if transfer_encoding == 'base64':
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   194
                                # We only special-case base64 transfer encoding
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   195
                                try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   196
                                    chunk = str(chunk).decode('base64')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   197
                                except Exception, e:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   198
                                    # Since this is only a chunk, any error is an unfixable error.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   199
                                    raise MultiPartParserError("Could not decode base64 data: %r" % e)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   200
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   201
                            for i, handler in enumerate(handlers):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   202
                                chunk_length = len(chunk)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   203
                                chunk = handler.receive_data_chunk(chunk,
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   204
                                                                   counters[i])
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   205
                                counters[i] += chunk_length
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   206
                                if chunk is None:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   207
                                    # If the chunk received by the handler is None, then don't continue.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   208
                                    break
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   209
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   210
                    except SkipFile, e:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   211
                        # Just use up the rest of this file...
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   212
                        exhaust(field_stream)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   213
                    else:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   214
                        # Handle file upload completions on next iteration.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   215
                        old_field_name = field_name
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   216
                else:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   217
                    # If this is neither a FIELD or a FILE, just exhaust the stream.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   218
                    exhaust(stream)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   219
        except StopUpload, e:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   220
            if not e.connection_reset:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   221
                exhaust(limited_input_data)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   222
        else:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   223
            # Make sure that the request data is all fed
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   224
            exhaust(limited_input_data)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   225
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   226
        # Signal that the upload has completed.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   227
        for handler in handlers:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   228
            retval = handler.upload_complete()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   229
            if retval:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   230
                break
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   231
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   232
        return self._post, self._files
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   233
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   234
    def handle_file_complete(self, old_field_name, counters):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   235
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   236
        Handle all the signalling that takes place when a file is complete.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   237
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   238
        for i, handler in enumerate(self._upload_handlers):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   239
            file_obj = handler.file_complete(counters[i])
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   240
            if file_obj:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   241
                # If it returns a file object, then set the files dict.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   242
                self._files.appendlist(force_unicode(old_field_name,
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   243
                                                     self._encoding,
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   244
                                                     errors='replace'),
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   245
                                       file_obj)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   246
                break
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   247
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   248
    def IE_sanitize(self, filename):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   249
        """Cleanup filename from Internet Explorer full paths."""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   250
        return filename and filename[filename.rfind("\\")+1:].strip()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   251
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   252
class LazyStream(object):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   253
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   254
    The LazyStream wrapper allows one to get and "unget" bytes from a stream.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   255
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   256
    Given a producer object (an iterator that yields bytestrings), the
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   257
    LazyStream object will support iteration, reading, and keeping a "look-back"
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   258
    variable in case you need to "unget" some bytes.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   259
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   260
    def __init__(self, producer, length=None):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   261
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   262
        Every LazyStream must have a producer when instantiated.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   263
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   264
        A producer is an iterable that returns a string each time it
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   265
        is called.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   266
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   267
        self._producer = producer
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   268
        self._empty = False
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   269
        self._leftover = ''
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   270
        self.length = length
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   271
        self.position = 0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   272
        self._remaining = length
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   273
        self._unget_history = []
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   274
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   275
    def tell(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   276
        return self.position
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   277
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   278
    def read(self, size=None):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   279
        def parts():
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   280
            remaining = (size is not None and [size] or [self._remaining])[0]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   281
            # do the whole thing in one shot if no limit was provided.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   282
            if remaining is None:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   283
                yield ''.join(self)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   284
                return
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   285
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   286
            # otherwise do some bookkeeping to return exactly enough
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   287
            # of the stream and stashing any extra content we get from
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   288
            # the producer
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   289
            while remaining != 0:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   290
                assert remaining > 0, 'remaining bytes to read should never go negative'
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   291
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   292
                chunk = self.next()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   293
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   294
                emitting = chunk[:remaining]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   295
                self.unget(chunk[remaining:])
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   296
                remaining -= len(emitting)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   297
                yield emitting
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   298
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   299
        out = ''.join(parts())
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   300
        return out
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   301
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   302
    def next(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   303
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   304
        Used when the exact number of bytes to read is unimportant.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   305
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   306
        This procedure just returns whatever is chunk is conveniently returned
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   307
        from the iterator instead. Useful to avoid unnecessary bookkeeping if
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   308
        performance is an issue.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   309
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   310
        if self._leftover:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   311
            output = self._leftover
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   312
            self._leftover = ''
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   313
        else:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   314
            output = self._producer.next()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   315
            self._unget_history = []
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   316
        self.position += len(output)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   317
        return output
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   318
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   319
    def close(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   320
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   321
        Used to invalidate/disable this lazy stream.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   322
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   323
        Replaces the producer with an empty list. Any leftover bytes that have
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   324
        already been read will still be reported upon read() and/or next().
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   325
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   326
        self._producer = []
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   327
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   328
    def __iter__(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   329
        return self
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   330
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   331
    def unget(self, bytes):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   332
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   333
        Places bytes back onto the front of the lazy stream.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   334
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   335
        Future calls to read() will return those bytes first. The
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   336
        stream position and thus tell() will be rewound.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   337
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   338
        if not bytes:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   339
            return
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   340
        self._update_unget_history(len(bytes))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   341
        self.position -= len(bytes)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   342
        self._leftover = ''.join([bytes, self._leftover])
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   343
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   344
    def _update_unget_history(self, num_bytes):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   345
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   346
        Updates the unget history as a sanity check to see if we've pushed
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   347
        back the same number of bytes in one chunk. If we keep ungetting the
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   348
        same number of bytes many times (here, 50), we're mostly likely in an
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   349
        infinite loop of some sort. This is usually caused by a
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   350
        maliciously-malformed MIME request.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   351
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   352
        self._unget_history = [num_bytes] + self._unget_history[:49]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   353
        number_equal = len([current_number for current_number in self._unget_history
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   354
                            if current_number == num_bytes])
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   355
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   356
        if number_equal > 40:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   357
            raise SuspiciousOperation(
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   358
                "The multipart parser got stuck, which shouldn't happen with"
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   359
                " normal uploaded files. Check for malicious upload activity;"
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   360
                " if there is none, report this to the Django developers."
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   361
            )
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   362
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   363
class ChunkIter(object):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   364
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   365
    An iterable that will yield chunks of data. Given a file-like object as the
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   366
    constructor, this object will yield chunks of read operations from that
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   367
    object.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   368
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   369
    def __init__(self, flo, chunk_size=64 * 1024):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   370
        self.flo = flo
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   371
        self.chunk_size = chunk_size
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   372
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   373
    def next(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   374
        try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   375
            data = self.flo.read(self.chunk_size)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   376
        except InputStreamExhausted:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   377
            raise StopIteration()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   378
        if data:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   379
            return data
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   380
        else:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   381
            raise StopIteration()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   382
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   383
    def __iter__(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   384
        return self
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   385
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   386
class LimitBytes(object):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   387
    """ Limit bytes for a file object. """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   388
    def __init__(self, fileobject, length):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   389
        self._file = fileobject
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   390
        self.remaining = length
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   391
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   392
    def read(self, num_bytes=None):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   393
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   394
        Read data from the underlying file.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   395
        If you ask for too much or there isn't anything left,
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   396
        this will raise an InputStreamExhausted error.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   397
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   398
        if self.remaining <= 0:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   399
            raise InputStreamExhausted()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   400
        if num_bytes is None:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   401
            num_bytes = self.remaining
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   402
        else:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   403
            num_bytes = min(num_bytes, self.remaining)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   404
        self.remaining -= num_bytes
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   405
        return self._file.read(num_bytes)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   406
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   407
class InterBoundaryIter(object):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   408
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   409
    A Producer that will iterate over boundaries.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   410
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   411
    def __init__(self, stream, boundary):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   412
        self._stream = stream
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   413
        self._boundary = boundary
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   414
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   415
    def __iter__(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   416
        return self
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   417
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   418
    def next(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   419
        try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   420
            return LazyStream(BoundaryIter(self._stream, self._boundary))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   421
        except InputStreamExhausted:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   422
            raise StopIteration()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   423
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   424
class BoundaryIter(object):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   425
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   426
    A Producer that is sensitive to boundaries.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   427
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   428
    Will happily yield bytes until a boundary is found. Will yield the bytes
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   429
    before the boundary, throw away the boundary bytes themselves, and push the
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   430
    post-boundary bytes back on the stream.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   431
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   432
    The future calls to .next() after locating the boundary will raise a
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   433
    StopIteration exception.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   434
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   435
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   436
    def __init__(self, stream, boundary):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   437
        self._stream = stream
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   438
        self._boundary = boundary
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   439
        self._done = False
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   440
        # rollback an additional six bytes because the format is like
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   441
        # this: CRLF<boundary>[--CRLF]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   442
        self._rollback = len(boundary) + 6
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   443
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   444
        # Try to use mx fast string search if available. Otherwise
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   445
        # use Python find. Wrap the latter for consistency.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   446
        unused_char = self._stream.read(1)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   447
        if not unused_char:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   448
            raise InputStreamExhausted()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   449
        self._stream.unget(unused_char)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   450
        try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   451
            from mx.TextTools import FS
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   452
            self._fs = FS(boundary).find
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   453
        except ImportError:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   454
            self._fs = lambda data: data.find(boundary)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   455
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   456
    def __iter__(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   457
        return self
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   458
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   459
    def next(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   460
        if self._done:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   461
            raise StopIteration()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   462
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   463
        stream = self._stream
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   464
        rollback = self._rollback
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   465
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   466
        bytes_read = 0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   467
        chunks = []
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   468
        for bytes in stream:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   469
            bytes_read += len(bytes)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   470
            chunks.append(bytes)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   471
            if bytes_read > rollback:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   472
                break
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   473
            if not bytes:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   474
                break
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   475
        else:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   476
            self._done = True
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   477
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   478
        if not chunks:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   479
            raise StopIteration()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   480
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   481
        chunk = ''.join(chunks)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   482
        boundary = self._find_boundary(chunk, len(chunk) < self._rollback)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   483
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   484
        if boundary:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   485
            end, next = boundary
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   486
            stream.unget(chunk[next:])
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   487
            self._done = True
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   488
            return chunk[:end]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   489
        else:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   490
            # make sure we dont treat a partial boundary (and
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   491
            # its separators) as data
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   492
            if not chunk[:-rollback]:# and len(chunk) >= (len(self._boundary) + 6):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   493
                # There's nothing left, we should just return and mark as done.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   494
                self._done = True
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   495
                return chunk
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   496
            else:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   497
                stream.unget(chunk[-rollback:])
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   498
                return chunk[:-rollback]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   499
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   500
    def _find_boundary(self, data, eof = False):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   501
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   502
        Finds a multipart boundary in data.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   503
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   504
        Should no boundry exist in the data None is returned instead. Otherwise
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   505
        a tuple containing the indices of the following are returned:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   506
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   507
         * the end of current encapsulation
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   508
         * the start of the next encapsulation
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   509
        """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   510
        index = self._fs(data)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   511
        if index < 0:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   512
            return None
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   513
        else:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   514
            end = index
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   515
            next = index + len(self._boundary)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   516
            # backup over CRLF
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   517
            if data[max(0,end-1)] == '\n':
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   518
                end -= 1
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   519
            if data[max(0,end-1)] == '\r':
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   520
                end -= 1
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   521
            return end, next
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   522
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   523
def exhaust(stream_or_iterable):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   524
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   525
    Completely exhausts an iterator or stream.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   526
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   527
    Raise a MultiPartParserError if the argument is not a stream or an iterable.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   528
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   529
    iterator = None
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   530
    try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   531
        iterator = iter(stream_or_iterable)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   532
    except TypeError:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   533
        iterator = ChunkIter(stream_or_iterable, 16384)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   534
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   535
    if iterator is None:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   536
        raise MultiPartParserError('multipartparser.exhaust() was passed a non-iterable or stream parameter')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   537
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   538
    for __ in iterator:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   539
        pass
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   540
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   541
def parse_boundary_stream(stream, max_header_size):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   542
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   543
    Parses one and exactly one stream that encapsulates a boundary.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   544
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   545
    # Stream at beginning of header, look for end of header
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   546
    # and parse it if found. The header must fit within one
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   547
    # chunk.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   548
    chunk = stream.read(max_header_size)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   549
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   550
    # 'find' returns the top of these four bytes, so we'll
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   551
    # need to munch them later to prevent them from polluting
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   552
    # the payload.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   553
    header_end = chunk.find('\r\n\r\n')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   554
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   555
    def _parse_header(line):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   556
        main_value_pair, params = parse_header(line)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   557
        try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   558
            name, value = main_value_pair.split(':', 1)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   559
        except:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   560
            raise ValueError("Invalid header: %r" % line)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   561
        return name, (value, params)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   562
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   563
    if header_end == -1:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   564
        # we find no header, so we just mark this fact and pass on
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   565
        # the stream verbatim
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   566
        stream.unget(chunk)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   567
        return (RAW, {}, stream)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   568
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   569
    header = chunk[:header_end]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   570
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   571
    # here we place any excess chunk back onto the stream, as
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   572
    # well as throwing away the CRLFCRLF bytes from above.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   573
    stream.unget(chunk[header_end + 4:])
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   574
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   575
    TYPE = RAW
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   576
    outdict = {}
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   577
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   578
    # Eliminate blank lines
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   579
    for line in header.split('\r\n'):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   580
        # This terminology ("main value" and "dictionary of
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   581
        # parameters") is from the Python docs.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   582
        try:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   583
            name, (value, params) = _parse_header(line)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   584
        except:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   585
            continue
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   586
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   587
        if name == 'content-disposition':
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   588
            TYPE = FIELD
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   589
            if params.get('filename'):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   590
                TYPE = FILE
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   591
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   592
        outdict[name] = value, params
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   593
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   594
    if TYPE == RAW:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   595
        stream.unget(chunk)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   596
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   597
    return (TYPE, outdict, stream)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   598
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   599
class Parser(object):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   600
    def __init__(self, stream, boundary):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   601
        self._stream = stream
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   602
        self._separator = '--' + boundary
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   603
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   604
    def __iter__(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   605
        boundarystream = InterBoundaryIter(self._stream, self._separator)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   606
        for sub_stream in boundarystream:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   607
            # Iterate over each part
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   608
            yield parse_boundary_stream(sub_stream, 1024)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   609
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   610
def parse_header(line):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   611
    """ Parse the header into a key-value. """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   612
    plist = _parse_header_params(';' + line)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   613
    key = plist.pop(0).lower()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   614
    pdict = {}
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   615
    for p in plist:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   616
        i = p.find('=')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   617
        if i >= 0:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   618
            name = p[:i].strip().lower()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   619
            value = p[i+1:].strip()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   620
            if len(value) >= 2 and value[0] == value[-1] == '"':
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   621
                value = value[1:-1]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   622
                value = value.replace('\\\\', '\\').replace('\\"', '"')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   623
            pdict[name] = value
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   624
    return key, pdict
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   625
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   626
def _parse_header_params(s):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   627
    plist = []
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   628
    while s[:1] == ';':
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   629
        s = s[1:]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   630
        end = s.find(';')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   631
        while end > 0 and s.count('"', 0, end) % 2:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   632
            end = s.find(';', end + 1)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   633
        if end < 0:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   634
            end = len(s)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   635
        f = s[:end]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   636
        plist.append(f.strip())
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   637
        s = s[end:]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   638
    return plist