|
0
|
1 |
""" |
|
|
2 |
Multi-part parsing for file uploads. |
|
|
3 |
|
|
|
4 |
Exposes one class, ``MultiPartParser``, which feeds chunks of uploaded data to |
|
|
5 |
file upload handlers for processing. |
|
|
6 |
""" |
|
|
7 |
|
|
|
8 |
import cgi |
|
|
9 |
from django.conf import settings |
|
|
10 |
from django.core.exceptions import SuspiciousOperation |
|
|
11 |
from django.utils.datastructures import MultiValueDict |
|
|
12 |
from django.utils.encoding import force_unicode |
|
|
13 |
from django.utils.text import unescape_entities |
|
|
14 |
from django.core.files.uploadhandler import StopUpload, SkipFile, StopFutureHandlers |
|
|
15 |
|
|
|
16 |
__all__ = ('MultiPartParser', 'MultiPartParserError', 'InputStreamExhausted') |
|
|
17 |
|
|
|
18 |
class MultiPartParserError(Exception): |
|
|
19 |
pass |
|
|
20 |
|
|
|
21 |
class InputStreamExhausted(Exception): |
|
|
22 |
""" |
|
|
23 |
No more reads are allowed from this device. |
|
|
24 |
""" |
|
|
25 |
pass |
|
|
26 |
|
|
|
27 |
RAW = "raw" |
|
|
28 |
FILE = "file" |
|
|
29 |
FIELD = "field" |
|
|
30 |
|
|
|
31 |
class MultiPartParser(object): |
|
|
32 |
""" |
|
|
33 |
A rfc2388 multipart/form-data parser. |
|
|
34 |
|
|
|
35 |
``MultiValueDict.parse()`` reads the input stream in ``chunk_size`` chunks |
|
|
36 |
and returns a tuple of ``(MultiValueDict(POST), MultiValueDict(FILES))``. If |
|
|
37 |
""" |
|
|
38 |
def __init__(self, META, input_data, upload_handlers, encoding=None): |
|
|
39 |
""" |
|
|
40 |
Initialize the MultiPartParser object. |
|
|
41 |
|
|
|
42 |
:META: |
|
|
43 |
The standard ``META`` dictionary in Django request objects. |
|
|
44 |
:input_data: |
|
|
45 |
The raw post data, as a file-like object. |
|
|
46 |
:upload_handler: |
|
|
47 |
An UploadHandler instance that performs operations on the uploaded |
|
|
48 |
data. |
|
|
49 |
:encoding: |
|
|
50 |
The encoding with which to treat the incoming data. |
|
|
51 |
""" |
|
|
52 |
|
|
|
53 |
# |
|
|
54 |
# Content-Type should containt multipart and the boundary information. |
|
|
55 |
# |
|
|
56 |
|
|
|
57 |
content_type = META.get('HTTP_CONTENT_TYPE', META.get('CONTENT_TYPE', '')) |
|
|
58 |
if not content_type.startswith('multipart/'): |
|
|
59 |
raise MultiPartParserError('Invalid Content-Type: %s' % content_type) |
|
|
60 |
|
|
|
61 |
# Parse the header to get the boundary to split the parts. |
|
|
62 |
ctypes, opts = parse_header(content_type) |
|
|
63 |
boundary = opts.get('boundary') |
|
|
64 |
if not boundary or not cgi.valid_boundary(boundary): |
|
|
65 |
raise MultiPartParserError('Invalid boundary in multipart: %s' % boundary) |
|
|
66 |
|
|
|
67 |
|
|
|
68 |
# |
|
|
69 |
# Content-Length should contain the length of the body we are about |
|
|
70 |
# to receive. |
|
|
71 |
# |
|
|
72 |
try: |
|
|
73 |
content_length = int(META.get('HTTP_CONTENT_LENGTH', META.get('CONTENT_LENGTH',0))) |
|
|
74 |
except (ValueError, TypeError): |
|
|
75 |
# For now set it to 0; we'll try again later on down. |
|
|
76 |
content_length = 0 |
|
|
77 |
|
|
|
78 |
if content_length <= 0: |
|
|
79 |
# This means we shouldn't continue...raise an error. |
|
|
80 |
raise MultiPartParserError("Invalid content length: %r" % content_length) |
|
|
81 |
|
|
|
82 |
self._boundary = boundary |
|
|
83 |
self._input_data = input_data |
|
|
84 |
|
|
|
85 |
# For compatibility with low-level network APIs (with 32-bit integers), |
|
|
86 |
# the chunk size should be < 2^31, but still divisible by 4. |
|
|
87 |
possible_sizes = [x.chunk_size for x in upload_handlers if x.chunk_size] |
|
|
88 |
self._chunk_size = min([2**31-4] + possible_sizes) |
|
|
89 |
|
|
|
90 |
self._meta = META |
|
|
91 |
self._encoding = encoding or settings.DEFAULT_CHARSET |
|
|
92 |
self._content_length = content_length |
|
|
93 |
self._upload_handlers = upload_handlers |
|
|
94 |
|
|
|
95 |
def parse(self): |
|
|
96 |
""" |
|
|
97 |
Parse the POST data and break it into a FILES MultiValueDict and a POST |
|
|
98 |
MultiValueDict. |
|
|
99 |
|
|
|
100 |
Returns a tuple containing the POST and FILES dictionary, respectively. |
|
|
101 |
""" |
|
|
102 |
# We have to import QueryDict down here to avoid a circular import. |
|
|
103 |
from django.http import QueryDict |
|
|
104 |
|
|
|
105 |
encoding = self._encoding |
|
|
106 |
handlers = self._upload_handlers |
|
|
107 |
|
|
|
108 |
limited_input_data = LimitBytes(self._input_data, self._content_length) |
|
|
109 |
|
|
|
110 |
# See if the handler will want to take care of the parsing. |
|
|
111 |
# This allows overriding everything if somebody wants it. |
|
|
112 |
for handler in handlers: |
|
|
113 |
result = handler.handle_raw_input(limited_input_data, |
|
|
114 |
self._meta, |
|
|
115 |
self._content_length, |
|
|
116 |
self._boundary, |
|
|
117 |
encoding) |
|
|
118 |
if result is not None: |
|
|
119 |
return result[0], result[1] |
|
|
120 |
|
|
|
121 |
# Create the data structures to be used later. |
|
|
122 |
self._post = QueryDict('', mutable=True) |
|
|
123 |
self._files = MultiValueDict() |
|
|
124 |
|
|
|
125 |
# Instantiate the parser and stream: |
|
|
126 |
stream = LazyStream(ChunkIter(limited_input_data, self._chunk_size)) |
|
|
127 |
|
|
|
128 |
# Whether or not to signal a file-completion at the beginning of the loop. |
|
|
129 |
old_field_name = None |
|
|
130 |
counters = [0] * len(handlers) |
|
|
131 |
|
|
|
132 |
try: |
|
|
133 |
for item_type, meta_data, field_stream in Parser(stream, self._boundary): |
|
|
134 |
if old_field_name: |
|
|
135 |
# We run this at the beginning of the next loop |
|
|
136 |
# since we cannot be sure a file is complete until |
|
|
137 |
# we hit the next boundary/part of the multipart content. |
|
|
138 |
self.handle_file_complete(old_field_name, counters) |
|
|
139 |
old_field_name = None |
|
|
140 |
|
|
|
141 |
try: |
|
|
142 |
disposition = meta_data['content-disposition'][1] |
|
|
143 |
field_name = disposition['name'].strip() |
|
|
144 |
except (KeyError, IndexError, AttributeError): |
|
|
145 |
continue |
|
|
146 |
|
|
|
147 |
transfer_encoding = meta_data.get('content-transfer-encoding') |
|
|
148 |
field_name = force_unicode(field_name, encoding, errors='replace') |
|
|
149 |
|
|
|
150 |
if item_type == FIELD: |
|
|
151 |
# This is a post field, we can just set it in the post |
|
|
152 |
if transfer_encoding == 'base64': |
|
|
153 |
raw_data = field_stream.read() |
|
|
154 |
try: |
|
|
155 |
data = str(raw_data).decode('base64') |
|
|
156 |
except: |
|
|
157 |
data = raw_data |
|
|
158 |
else: |
|
|
159 |
data = field_stream.read() |
|
|
160 |
|
|
|
161 |
self._post.appendlist(field_name, |
|
|
162 |
force_unicode(data, encoding, errors='replace')) |
|
|
163 |
elif item_type == FILE: |
|
|
164 |
# This is a file, use the handler... |
|
|
165 |
file_name = disposition.get('filename') |
|
|
166 |
if not file_name: |
|
|
167 |
continue |
|
|
168 |
file_name = force_unicode(file_name, encoding, errors='replace') |
|
|
169 |
file_name = self.IE_sanitize(unescape_entities(file_name)) |
|
|
170 |
|
|
|
171 |
content_type = meta_data.get('content-type', ('',))[0].strip() |
|
|
172 |
try: |
|
|
173 |
charset = meta_data.get('content-type', (0,{}))[1].get('charset', None) |
|
|
174 |
except: |
|
|
175 |
charset = None |
|
|
176 |
|
|
|
177 |
try: |
|
|
178 |
content_length = int(meta_data.get('content-length')[0]) |
|
|
179 |
except (IndexError, TypeError, ValueError): |
|
|
180 |
content_length = None |
|
|
181 |
|
|
|
182 |
counters = [0] * len(handlers) |
|
|
183 |
try: |
|
|
184 |
for handler in handlers: |
|
|
185 |
try: |
|
|
186 |
handler.new_file(field_name, file_name, |
|
|
187 |
content_type, content_length, |
|
|
188 |
charset) |
|
|
189 |
except StopFutureHandlers: |
|
|
190 |
break |
|
|
191 |
|
|
|
192 |
for chunk in field_stream: |
|
|
193 |
if transfer_encoding == 'base64': |
|
|
194 |
# We only special-case base64 transfer encoding |
|
|
195 |
try: |
|
|
196 |
chunk = str(chunk).decode('base64') |
|
|
197 |
except Exception, e: |
|
|
198 |
# Since this is only a chunk, any error is an unfixable error. |
|
|
199 |
raise MultiPartParserError("Could not decode base64 data: %r" % e) |
|
|
200 |
|
|
|
201 |
for i, handler in enumerate(handlers): |
|
|
202 |
chunk_length = len(chunk) |
|
|
203 |
chunk = handler.receive_data_chunk(chunk, |
|
|
204 |
counters[i]) |
|
|
205 |
counters[i] += chunk_length |
|
|
206 |
if chunk is None: |
|
|
207 |
# If the chunk received by the handler is None, then don't continue. |
|
|
208 |
break |
|
|
209 |
|
|
|
210 |
except SkipFile, e: |
|
|
211 |
# Just use up the rest of this file... |
|
|
212 |
exhaust(field_stream) |
|
|
213 |
else: |
|
|
214 |
# Handle file upload completions on next iteration. |
|
|
215 |
old_field_name = field_name |
|
|
216 |
else: |
|
|
217 |
# If this is neither a FIELD or a FILE, just exhaust the stream. |
|
|
218 |
exhaust(stream) |
|
|
219 |
except StopUpload, e: |
|
|
220 |
if not e.connection_reset: |
|
|
221 |
exhaust(limited_input_data) |
|
|
222 |
else: |
|
|
223 |
# Make sure that the request data is all fed |
|
|
224 |
exhaust(limited_input_data) |
|
|
225 |
|
|
|
226 |
# Signal that the upload has completed. |
|
|
227 |
for handler in handlers: |
|
|
228 |
retval = handler.upload_complete() |
|
|
229 |
if retval: |
|
|
230 |
break |
|
|
231 |
|
|
|
232 |
return self._post, self._files |
|
|
233 |
|
|
|
234 |
def handle_file_complete(self, old_field_name, counters): |
|
|
235 |
""" |
|
|
236 |
Handle all the signalling that takes place when a file is complete. |
|
|
237 |
""" |
|
|
238 |
for i, handler in enumerate(self._upload_handlers): |
|
|
239 |
file_obj = handler.file_complete(counters[i]) |
|
|
240 |
if file_obj: |
|
|
241 |
# If it returns a file object, then set the files dict. |
|
|
242 |
self._files.appendlist(force_unicode(old_field_name, |
|
|
243 |
self._encoding, |
|
|
244 |
errors='replace'), |
|
|
245 |
file_obj) |
|
|
246 |
break |
|
|
247 |
|
|
|
248 |
def IE_sanitize(self, filename): |
|
|
249 |
"""Cleanup filename from Internet Explorer full paths.""" |
|
|
250 |
return filename and filename[filename.rfind("\\")+1:].strip() |
|
|
251 |
|
|
|
252 |
class LazyStream(object): |
|
|
253 |
""" |
|
|
254 |
The LazyStream wrapper allows one to get and "unget" bytes from a stream. |
|
|
255 |
|
|
|
256 |
Given a producer object (an iterator that yields bytestrings), the |
|
|
257 |
LazyStream object will support iteration, reading, and keeping a "look-back" |
|
|
258 |
variable in case you need to "unget" some bytes. |
|
|
259 |
""" |
|
|
260 |
def __init__(self, producer, length=None): |
|
|
261 |
""" |
|
|
262 |
Every LazyStream must have a producer when instantiated. |
|
|
263 |
|
|
|
264 |
A producer is an iterable that returns a string each time it |
|
|
265 |
is called. |
|
|
266 |
""" |
|
|
267 |
self._producer = producer |
|
|
268 |
self._empty = False |
|
|
269 |
self._leftover = '' |
|
|
270 |
self.length = length |
|
|
271 |
self.position = 0 |
|
|
272 |
self._remaining = length |
|
|
273 |
self._unget_history = [] |
|
|
274 |
|
|
|
275 |
def tell(self): |
|
|
276 |
return self.position |
|
|
277 |
|
|
|
278 |
def read(self, size=None): |
|
|
279 |
def parts(): |
|
|
280 |
remaining = (size is not None and [size] or [self._remaining])[0] |
|
|
281 |
# do the whole thing in one shot if no limit was provided. |
|
|
282 |
if remaining is None: |
|
|
283 |
yield ''.join(self) |
|
|
284 |
return |
|
|
285 |
|
|
|
286 |
# otherwise do some bookkeeping to return exactly enough |
|
|
287 |
# of the stream and stashing any extra content we get from |
|
|
288 |
# the producer |
|
|
289 |
while remaining != 0: |
|
|
290 |
assert remaining > 0, 'remaining bytes to read should never go negative' |
|
|
291 |
|
|
|
292 |
chunk = self.next() |
|
|
293 |
|
|
|
294 |
emitting = chunk[:remaining] |
|
|
295 |
self.unget(chunk[remaining:]) |
|
|
296 |
remaining -= len(emitting) |
|
|
297 |
yield emitting |
|
|
298 |
|
|
|
299 |
out = ''.join(parts()) |
|
|
300 |
return out |
|
|
301 |
|
|
|
302 |
def next(self): |
|
|
303 |
""" |
|
|
304 |
Used when the exact number of bytes to read is unimportant. |
|
|
305 |
|
|
|
306 |
This procedure just returns whatever is chunk is conveniently returned |
|
|
307 |
from the iterator instead. Useful to avoid unnecessary bookkeeping if |
|
|
308 |
performance is an issue. |
|
|
309 |
""" |
|
|
310 |
if self._leftover: |
|
|
311 |
output = self._leftover |
|
|
312 |
self._leftover = '' |
|
|
313 |
else: |
|
|
314 |
output = self._producer.next() |
|
|
315 |
self._unget_history = [] |
|
|
316 |
self.position += len(output) |
|
|
317 |
return output |
|
|
318 |
|
|
|
319 |
def close(self): |
|
|
320 |
""" |
|
|
321 |
Used to invalidate/disable this lazy stream. |
|
|
322 |
|
|
|
323 |
Replaces the producer with an empty list. Any leftover bytes that have |
|
|
324 |
already been read will still be reported upon read() and/or next(). |
|
|
325 |
""" |
|
|
326 |
self._producer = [] |
|
|
327 |
|
|
|
328 |
def __iter__(self): |
|
|
329 |
return self |
|
|
330 |
|
|
|
331 |
def unget(self, bytes): |
|
|
332 |
""" |
|
|
333 |
Places bytes back onto the front of the lazy stream. |
|
|
334 |
|
|
|
335 |
Future calls to read() will return those bytes first. The |
|
|
336 |
stream position and thus tell() will be rewound. |
|
|
337 |
""" |
|
|
338 |
if not bytes: |
|
|
339 |
return |
|
|
340 |
self._update_unget_history(len(bytes)) |
|
|
341 |
self.position -= len(bytes) |
|
|
342 |
self._leftover = ''.join([bytes, self._leftover]) |
|
|
343 |
|
|
|
344 |
def _update_unget_history(self, num_bytes): |
|
|
345 |
""" |
|
|
346 |
Updates the unget history as a sanity check to see if we've pushed |
|
|
347 |
back the same number of bytes in one chunk. If we keep ungetting the |
|
|
348 |
same number of bytes many times (here, 50), we're mostly likely in an |
|
|
349 |
infinite loop of some sort. This is usually caused by a |
|
|
350 |
maliciously-malformed MIME request. |
|
|
351 |
""" |
|
|
352 |
self._unget_history = [num_bytes] + self._unget_history[:49] |
|
|
353 |
number_equal = len([current_number for current_number in self._unget_history |
|
|
354 |
if current_number == num_bytes]) |
|
|
355 |
|
|
|
356 |
if number_equal > 40: |
|
|
357 |
raise SuspiciousOperation( |
|
|
358 |
"The multipart parser got stuck, which shouldn't happen with" |
|
|
359 |
" normal uploaded files. Check for malicious upload activity;" |
|
|
360 |
" if there is none, report this to the Django developers." |
|
|
361 |
) |
|
|
362 |
|
|
|
363 |
class ChunkIter(object): |
|
|
364 |
""" |
|
|
365 |
An iterable that will yield chunks of data. Given a file-like object as the |
|
|
366 |
constructor, this object will yield chunks of read operations from that |
|
|
367 |
object. |
|
|
368 |
""" |
|
|
369 |
def __init__(self, flo, chunk_size=64 * 1024): |
|
|
370 |
self.flo = flo |
|
|
371 |
self.chunk_size = chunk_size |
|
|
372 |
|
|
|
373 |
def next(self): |
|
|
374 |
try: |
|
|
375 |
data = self.flo.read(self.chunk_size) |
|
|
376 |
except InputStreamExhausted: |
|
|
377 |
raise StopIteration() |
|
|
378 |
if data: |
|
|
379 |
return data |
|
|
380 |
else: |
|
|
381 |
raise StopIteration() |
|
|
382 |
|
|
|
383 |
def __iter__(self): |
|
|
384 |
return self |
|
|
385 |
|
|
|
386 |
class LimitBytes(object): |
|
|
387 |
""" Limit bytes for a file object. """ |
|
|
388 |
def __init__(self, fileobject, length): |
|
|
389 |
self._file = fileobject |
|
|
390 |
self.remaining = length |
|
|
391 |
|
|
|
392 |
def read(self, num_bytes=None): |
|
|
393 |
""" |
|
|
394 |
Read data from the underlying file. |
|
|
395 |
If you ask for too much or there isn't anything left, |
|
|
396 |
this will raise an InputStreamExhausted error. |
|
|
397 |
""" |
|
|
398 |
if self.remaining <= 0: |
|
|
399 |
raise InputStreamExhausted() |
|
|
400 |
if num_bytes is None: |
|
|
401 |
num_bytes = self.remaining |
|
|
402 |
else: |
|
|
403 |
num_bytes = min(num_bytes, self.remaining) |
|
|
404 |
self.remaining -= num_bytes |
|
|
405 |
return self._file.read(num_bytes) |
|
|
406 |
|
|
|
407 |
class InterBoundaryIter(object): |
|
|
408 |
""" |
|
|
409 |
A Producer that will iterate over boundaries. |
|
|
410 |
""" |
|
|
411 |
def __init__(self, stream, boundary): |
|
|
412 |
self._stream = stream |
|
|
413 |
self._boundary = boundary |
|
|
414 |
|
|
|
415 |
def __iter__(self): |
|
|
416 |
return self |
|
|
417 |
|
|
|
418 |
def next(self): |
|
|
419 |
try: |
|
|
420 |
return LazyStream(BoundaryIter(self._stream, self._boundary)) |
|
|
421 |
except InputStreamExhausted: |
|
|
422 |
raise StopIteration() |
|
|
423 |
|
|
|
424 |
class BoundaryIter(object): |
|
|
425 |
""" |
|
|
426 |
A Producer that is sensitive to boundaries. |
|
|
427 |
|
|
|
428 |
Will happily yield bytes until a boundary is found. Will yield the bytes |
|
|
429 |
before the boundary, throw away the boundary bytes themselves, and push the |
|
|
430 |
post-boundary bytes back on the stream. |
|
|
431 |
|
|
|
432 |
The future calls to .next() after locating the boundary will raise a |
|
|
433 |
StopIteration exception. |
|
|
434 |
""" |
|
|
435 |
|
|
|
436 |
def __init__(self, stream, boundary): |
|
|
437 |
self._stream = stream |
|
|
438 |
self._boundary = boundary |
|
|
439 |
self._done = False |
|
|
440 |
# rollback an additional six bytes because the format is like |
|
|
441 |
# this: CRLF<boundary>[--CRLF] |
|
|
442 |
self._rollback = len(boundary) + 6 |
|
|
443 |
|
|
|
444 |
# Try to use mx fast string search if available. Otherwise |
|
|
445 |
# use Python find. Wrap the latter for consistency. |
|
|
446 |
unused_char = self._stream.read(1) |
|
|
447 |
if not unused_char: |
|
|
448 |
raise InputStreamExhausted() |
|
|
449 |
self._stream.unget(unused_char) |
|
|
450 |
try: |
|
|
451 |
from mx.TextTools import FS |
|
|
452 |
self._fs = FS(boundary).find |
|
|
453 |
except ImportError: |
|
|
454 |
self._fs = lambda data: data.find(boundary) |
|
|
455 |
|
|
|
456 |
def __iter__(self): |
|
|
457 |
return self |
|
|
458 |
|
|
|
459 |
def next(self): |
|
|
460 |
if self._done: |
|
|
461 |
raise StopIteration() |
|
|
462 |
|
|
|
463 |
stream = self._stream |
|
|
464 |
rollback = self._rollback |
|
|
465 |
|
|
|
466 |
bytes_read = 0 |
|
|
467 |
chunks = [] |
|
|
468 |
for bytes in stream: |
|
|
469 |
bytes_read += len(bytes) |
|
|
470 |
chunks.append(bytes) |
|
|
471 |
if bytes_read > rollback: |
|
|
472 |
break |
|
|
473 |
if not bytes: |
|
|
474 |
break |
|
|
475 |
else: |
|
|
476 |
self._done = True |
|
|
477 |
|
|
|
478 |
if not chunks: |
|
|
479 |
raise StopIteration() |
|
|
480 |
|
|
|
481 |
chunk = ''.join(chunks) |
|
|
482 |
boundary = self._find_boundary(chunk, len(chunk) < self._rollback) |
|
|
483 |
|
|
|
484 |
if boundary: |
|
|
485 |
end, next = boundary |
|
|
486 |
stream.unget(chunk[next:]) |
|
|
487 |
self._done = True |
|
|
488 |
return chunk[:end] |
|
|
489 |
else: |
|
|
490 |
# make sure we dont treat a partial boundary (and |
|
|
491 |
# its separators) as data |
|
|
492 |
if not chunk[:-rollback]:# and len(chunk) >= (len(self._boundary) + 6): |
|
|
493 |
# There's nothing left, we should just return and mark as done. |
|
|
494 |
self._done = True |
|
|
495 |
return chunk |
|
|
496 |
else: |
|
|
497 |
stream.unget(chunk[-rollback:]) |
|
|
498 |
return chunk[:-rollback] |
|
|
499 |
|
|
|
500 |
def _find_boundary(self, data, eof = False): |
|
|
501 |
""" |
|
|
502 |
Finds a multipart boundary in data. |
|
|
503 |
|
|
|
504 |
Should no boundry exist in the data None is returned instead. Otherwise |
|
|
505 |
a tuple containing the indices of the following are returned: |
|
|
506 |
|
|
|
507 |
* the end of current encapsulation |
|
|
508 |
* the start of the next encapsulation |
|
|
509 |
""" |
|
|
510 |
index = self._fs(data) |
|
|
511 |
if index < 0: |
|
|
512 |
return None |
|
|
513 |
else: |
|
|
514 |
end = index |
|
|
515 |
next = index + len(self._boundary) |
|
|
516 |
# backup over CRLF |
|
|
517 |
if data[max(0,end-1)] == '\n': |
|
|
518 |
end -= 1 |
|
|
519 |
if data[max(0,end-1)] == '\r': |
|
|
520 |
end -= 1 |
|
|
521 |
return end, next |
|
|
522 |
|
|
|
523 |
def exhaust(stream_or_iterable): |
|
|
524 |
""" |
|
|
525 |
Completely exhausts an iterator or stream. |
|
|
526 |
|
|
|
527 |
Raise a MultiPartParserError if the argument is not a stream or an iterable. |
|
|
528 |
""" |
|
|
529 |
iterator = None |
|
|
530 |
try: |
|
|
531 |
iterator = iter(stream_or_iterable) |
|
|
532 |
except TypeError: |
|
|
533 |
iterator = ChunkIter(stream_or_iterable, 16384) |
|
|
534 |
|
|
|
535 |
if iterator is None: |
|
|
536 |
raise MultiPartParserError('multipartparser.exhaust() was passed a non-iterable or stream parameter') |
|
|
537 |
|
|
|
538 |
for __ in iterator: |
|
|
539 |
pass |
|
|
540 |
|
|
|
541 |
def parse_boundary_stream(stream, max_header_size): |
|
|
542 |
""" |
|
|
543 |
Parses one and exactly one stream that encapsulates a boundary. |
|
|
544 |
""" |
|
|
545 |
# Stream at beginning of header, look for end of header |
|
|
546 |
# and parse it if found. The header must fit within one |
|
|
547 |
# chunk. |
|
|
548 |
chunk = stream.read(max_header_size) |
|
|
549 |
|
|
|
550 |
# 'find' returns the top of these four bytes, so we'll |
|
|
551 |
# need to munch them later to prevent them from polluting |
|
|
552 |
# the payload. |
|
|
553 |
header_end = chunk.find('\r\n\r\n') |
|
|
554 |
|
|
|
555 |
def _parse_header(line): |
|
|
556 |
main_value_pair, params = parse_header(line) |
|
|
557 |
try: |
|
|
558 |
name, value = main_value_pair.split(':', 1) |
|
|
559 |
except: |
|
|
560 |
raise ValueError("Invalid header: %r" % line) |
|
|
561 |
return name, (value, params) |
|
|
562 |
|
|
|
563 |
if header_end == -1: |
|
|
564 |
# we find no header, so we just mark this fact and pass on |
|
|
565 |
# the stream verbatim |
|
|
566 |
stream.unget(chunk) |
|
|
567 |
return (RAW, {}, stream) |
|
|
568 |
|
|
|
569 |
header = chunk[:header_end] |
|
|
570 |
|
|
|
571 |
# here we place any excess chunk back onto the stream, as |
|
|
572 |
# well as throwing away the CRLFCRLF bytes from above. |
|
|
573 |
stream.unget(chunk[header_end + 4:]) |
|
|
574 |
|
|
|
575 |
TYPE = RAW |
|
|
576 |
outdict = {} |
|
|
577 |
|
|
|
578 |
# Eliminate blank lines |
|
|
579 |
for line in header.split('\r\n'): |
|
|
580 |
# This terminology ("main value" and "dictionary of |
|
|
581 |
# parameters") is from the Python docs. |
|
|
582 |
try: |
|
|
583 |
name, (value, params) = _parse_header(line) |
|
|
584 |
except: |
|
|
585 |
continue |
|
|
586 |
|
|
|
587 |
if name == 'content-disposition': |
|
|
588 |
TYPE = FIELD |
|
|
589 |
if params.get('filename'): |
|
|
590 |
TYPE = FILE |
|
|
591 |
|
|
|
592 |
outdict[name] = value, params |
|
|
593 |
|
|
|
594 |
if TYPE == RAW: |
|
|
595 |
stream.unget(chunk) |
|
|
596 |
|
|
|
597 |
return (TYPE, outdict, stream) |
|
|
598 |
|
|
|
599 |
class Parser(object): |
|
|
600 |
def __init__(self, stream, boundary): |
|
|
601 |
self._stream = stream |
|
|
602 |
self._separator = '--' + boundary |
|
|
603 |
|
|
|
604 |
def __iter__(self): |
|
|
605 |
boundarystream = InterBoundaryIter(self._stream, self._separator) |
|
|
606 |
for sub_stream in boundarystream: |
|
|
607 |
# Iterate over each part |
|
|
608 |
yield parse_boundary_stream(sub_stream, 1024) |
|
|
609 |
|
|
|
610 |
def parse_header(line): |
|
|
611 |
""" Parse the header into a key-value. """ |
|
|
612 |
plist = _parse_header_params(';' + line) |
|
|
613 |
key = plist.pop(0).lower() |
|
|
614 |
pdict = {} |
|
|
615 |
for p in plist: |
|
|
616 |
i = p.find('=') |
|
|
617 |
if i >= 0: |
|
|
618 |
name = p[:i].strip().lower() |
|
|
619 |
value = p[i+1:].strip() |
|
|
620 |
if len(value) >= 2 and value[0] == value[-1] == '"': |
|
|
621 |
value = value[1:-1] |
|
|
622 |
value = value.replace('\\\\', '\\').replace('\\"', '"') |
|
|
623 |
pdict[name] = value |
|
|
624 |
return key, pdict |
|
|
625 |
|
|
|
626 |
def _parse_header_params(s): |
|
|
627 |
plist = [] |
|
|
628 |
while s[:1] == ';': |
|
|
629 |
s = s[1:] |
|
|
630 |
end = s.find(';') |
|
|
631 |
while end > 0 and s.count('"', 0, end) % 2: |
|
|
632 |
end = s.find(';', end + 1) |
|
|
633 |
if end < 0: |
|
|
634 |
end = len(s) |
|
|
635 |
f = s[:end] |
|
|
636 |
plist.append(f.strip()) |
|
|
637 |
s = s[end:] |
|
|
638 |
return plist |