diff -r ebaad720f88b -r 526ebd3988b0 web/lib/django_extensions/management/commands/sync_media_s3.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/lib/django_extensions/management/commands/sync_media_s3.py Wed Jan 20 12:37:40 2010 +0100 @@ -0,0 +1,265 @@ +""" +Sync Media to S3 +================ + +Django command that scans all files in your settings.MEDIA_ROOT folder and +uploads them to S3 with the same directory structure. + +This command can optionally do the following but it is off by default: +* gzip compress any CSS and Javascript files it finds and adds the appropriate + 'Content-Encoding' header. +* set a far future 'Expires' header for optimal caching. + +Note: This script requires the Python boto library and valid Amazon Web +Services API keys. + +Required settings.py variables: +AWS_ACCESS_KEY_ID = '' +AWS_SECRET_ACCESS_KEY = '' +AWS_BUCKET_NAME = '' + +Command options are: + -p PREFIX, --prefix=PREFIX + The prefix to prepend to the path on S3. + --gzip Enables gzipping CSS and Javascript files. + --expires Enables setting a far future expires header. + --force Skip the file mtime check to force upload of all + files. + --filter-list Override default directory and file exclusion + filters. (enter as comma seperated line) + +TODO: + * Use fnmatch (or regex) to allow more complex FILTER_LIST rules. + +""" +import datetime +import email +import mimetypes +import optparse +import os +import sys +import time + +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError + +# Make sure boto is available +try: + import boto + import boto.exception +except ImportError: + raise ImportError, "The boto Python library is not installed." + +class Command(BaseCommand): + + # Extra variables to avoid passing these around + AWS_ACCESS_KEY_ID = '' + AWS_SECRET_ACCESS_KEY = '' + AWS_BUCKET_NAME = '' + DIRECTORY = '' + FILTER_LIST = ['.DS_Store', '.svn', '.hg', '.git', 'Thumbs.db'] + GZIP_CONTENT_TYPES = ( + 'text/css', + 'application/javascript', + 'application/x-javascript' + ) + + upload_count = 0 + skip_count = 0 + + option_list = BaseCommand.option_list + ( + optparse.make_option('-p', '--prefix', + dest='prefix', default='', + help="The prefix to prepend to the path on S3."), + optparse.make_option('-d', '--dir', + dest='dir', default=settings.MEDIA_ROOT, + help="The root directory to use instead of your MEDIA_ROOT"), + optparse.make_option('--gzip', + action='store_true', dest='gzip', default=False, + help="Enables gzipping CSS and Javascript files."), + optparse.make_option('--expires', + action='store_true', dest='expires', default=False, + help="Enables setting a far future expires header."), + optparse.make_option('--force', + action='store_true', dest='force', default=False, + help="Skip the file mtime check to force upload of all files."), + optparse.make_option('--filter-list', dest='filter_list', + action='store', default='', + help="Override default directory and file exclusion filters. (enter as comma seperated line)"), + ) + + help = 'Syncs the complete MEDIA_ROOT structure and files to S3 into the given bucket name.' + args = 'bucket_name' + + can_import_settings = True + + def handle(self, *args, **options): + + # Check for AWS keys in settings + if not hasattr(settings, 'AWS_ACCESS_KEY_ID') or \ + not hasattr(settings, 'AWS_SECRET_ACCESS_KEY'): + raise CommandError('Missing AWS keys from settings file. Please' + + 'supply both AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.') + else: + self.AWS_ACCESS_KEY_ID = settings.AWS_ACCESS_KEY_ID + self.AWS_SECRET_ACCESS_KEY = settings.AWS_SECRET_ACCESS_KEY + + if not hasattr(settings, 'AWS_BUCKET_NAME'): + raise CommandError('Missing bucket name from settings file. Please' + + ' add the AWS_BUCKET_NAME to your settings file.') + else: + if not settings.AWS_BUCKET_NAME: + raise CommandError('AWS_BUCKET_NAME cannot be empty.') + self.AWS_BUCKET_NAME = settings.AWS_BUCKET_NAME + + if not hasattr(settings, 'MEDIA_ROOT'): + raise CommandError('MEDIA_ROOT must be set in your settings.') + else: + if not settings.MEDIA_ROOT: + raise CommandError('MEDIA_ROOT must be set in your settings.') + + self.verbosity = int(options.get('verbosity')) + self.prefix = options.get('prefix') + self.do_gzip = options.get('gzip') + self.do_expires = options.get('expires') + self.do_force = options.get('force') + self.DIRECTORY = options.get('dir') + self.FILTER_LIST = getattr(settings, 'FILTER_LIST', self.FILTER_LIST) + filter_list = options.get('filter_list').split(',') + if filter_list: + # command line option overrides default filter_list and + # settings.filter_list + self.FILTER_LIST = filter_list + + # Now call the syncing method to walk the MEDIA_ROOT directory and + # upload all files found. + self.sync_s3() + + print + print "%d files uploaded." % (self.upload_count) + print "%d files skipped." % (self.skip_count) + + def sync_s3(self): + """ + Walks the media directory and syncs files to S3 + """ + bucket, key = self.open_s3() + os.path.walk(self.DIRECTORY, self.upload_s3, + (bucket, key, self.AWS_BUCKET_NAME, self.DIRECTORY)) + + def compress_string(self, s): + """Gzip a given string.""" + import cStringIO, gzip + zbuf = cStringIO.StringIO() + zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf) + zfile.write(s) + zfile.close() + return zbuf.getvalue() + + def open_s3(self): + """ + Opens connection to S3 returning bucket and key + """ + conn = boto.connect_s3(self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY) + try: + bucket = conn.get_bucket(self.AWS_BUCKET_NAME) + except boto.exception.S3ResponseError: + bucket = conn.create_bucket(self.AWS_BUCKET_NAME) + return bucket, boto.s3.key.Key(bucket) + + def upload_s3(self, arg, dirname, names): + """ + This is the callback to os.path.walk and where much of the work happens + """ + bucket, key, bucket_name, root_dir = arg # expand arg tuple + + # Skip directories we don't want to sync + if os.path.basename(dirname) in self.FILTER_LIST: + # prevent walk from processing subfiles/subdirs below the ignored one + del names[:] + return + + # Later we assume the MEDIA_ROOT ends with a trailing slash + if not root_dir.endswith(os.path.sep): + root_dir = root_dir + os.path.sep + + for file in names: + headers = {} + + if file in self.FILTER_LIST: + continue # Skip files we don't want to sync + + filename = os.path.join(dirname, file) + if os.path.isdir(filename): + continue # Don't try to upload directories + + file_key = filename[len(root_dir):] + if self.prefix: + file_key = '%s/%s' % (self.prefix, file_key) + + # Check if file on S3 is older than local file, if so, upload + if not self.do_force: + s3_key = bucket.get_key(file_key) + if s3_key: + s3_datetime = datetime.datetime(*time.strptime( + s3_key.last_modified, '%a, %d %b %Y %H:%M:%S %Z')[0:6]) + local_datetime = datetime.datetime.utcfromtimestamp( + os.stat(filename).st_mtime) + if local_datetime < s3_datetime: + self.skip_count += 1 + if self.verbosity > 1: + print "File %s hasn't been modified since last " \ + "being uploaded" % (file_key) + continue + + # File is newer, let's process and upload + if self.verbosity > 0: + print "Uploading %s..." % (file_key) + + content_type = mimetypes.guess_type(filename)[0] + if content_type: + headers['Content-Type'] = content_type + file_obj = open(filename, 'rb') + file_size = os.fstat(file_obj.fileno()).st_size + filedata = file_obj.read() + if self.do_gzip: + # Gzipping only if file is large enough (>1K is recommended) + # and only if file is a common text type (not a binary file) + if file_size > 1024 and content_type in self.GZIP_CONTENT_TYPES: + filedata = self.compress_string(filedata) + headers['Content-Encoding'] = 'gzip' + if self.verbosity > 1: + print "\tgzipped: %dk to %dk" % \ + (file_size/1024, len(filedata)/1024) + if self.do_expires: + # HTTP/1.0 + headers['Expires'] = '%s GMT' % (email.Utils.formatdate( + time.mktime((datetime.datetime.now() + + datetime.timedelta(days=365*2)).timetuple()))) + # HTTP/1.1 + headers['Cache-Control'] = 'max-age %d' % (3600 * 24 * 365 * 2) + if self.verbosity > 1: + print "\texpires: %s" % (headers['Expires']) + print "\tcache-control: %s" % (headers['Cache-Control']) + + try: + key.name = file_key + key.set_contents_from_string(filedata, headers, replace=True) + key.set_acl('public-read') + except boto.s3.connection.S3CreateError, e: + print "Failed: %s" % e + except Exception, e: + print e + raise + else: + self.upload_count += 1 + + file_obj.close() + +# Backwards compatibility for Django r9110 +if not [opt for opt in Command.option_list if opt.dest=='verbosity']: + Command.option_list += ( + optparse.make_option('-v', '--verbosity', + dest='verbosity', default=1, action='count', + help="Verbose mode. Multiple -v options increase the verbosity."), + )