web/lib/django_extensions/management/commands/sync_media_s3.py
changeset 3 526ebd3988b0
equal deleted inserted replaced
1:ebaad720f88b 3:526ebd3988b0
       
     1 """
       
     2 Sync Media to S3
       
     3 ================
       
     4 
       
     5 Django command that scans all files in your settings.MEDIA_ROOT folder and
       
     6 uploads them to S3 with the same directory structure.
       
     7 
       
     8 This command can optionally do the following but it is off by default:
       
     9 * gzip compress any CSS and Javascript files it finds and adds the appropriate
       
    10   'Content-Encoding' header.
       
    11 * set a far future 'Expires' header for optimal caching.
       
    12 
       
    13 Note: This script requires the Python boto library and valid Amazon Web
       
    14 Services API keys.
       
    15 
       
    16 Required settings.py variables:
       
    17 AWS_ACCESS_KEY_ID = ''
       
    18 AWS_SECRET_ACCESS_KEY = ''
       
    19 AWS_BUCKET_NAME = ''
       
    20 
       
    21 Command options are:
       
    22   -p PREFIX, --prefix=PREFIX
       
    23                         The prefix to prepend to the path on S3.
       
    24   --gzip                Enables gzipping CSS and Javascript files.
       
    25   --expires             Enables setting a far future expires header.
       
    26   --force               Skip the file mtime check to force upload of all
       
    27                         files.
       
    28   --filter-list         Override default directory and file exclusion
       
    29                         filters. (enter as comma seperated line)
       
    30 
       
    31 TODO:
       
    32  * Use fnmatch (or regex) to allow more complex FILTER_LIST rules.
       
    33 
       
    34 """
       
    35 import datetime
       
    36 import email
       
    37 import mimetypes
       
    38 import optparse
       
    39 import os
       
    40 import sys
       
    41 import time
       
    42 
       
    43 from django.conf import settings
       
    44 from django.core.management.base import BaseCommand, CommandError
       
    45 
       
    46 # Make sure boto is available
       
    47 try:
       
    48     import boto
       
    49     import boto.exception
       
    50 except ImportError:
       
    51     raise ImportError, "The boto Python library is not installed."
       
    52 
       
    53 class Command(BaseCommand):
       
    54 
       
    55     # Extra variables to avoid passing these around
       
    56     AWS_ACCESS_KEY_ID = ''
       
    57     AWS_SECRET_ACCESS_KEY = ''
       
    58     AWS_BUCKET_NAME = ''
       
    59     DIRECTORY = ''
       
    60     FILTER_LIST = ['.DS_Store', '.svn', '.hg', '.git', 'Thumbs.db']
       
    61     GZIP_CONTENT_TYPES = (
       
    62         'text/css',
       
    63         'application/javascript',
       
    64         'application/x-javascript'
       
    65     )
       
    66 
       
    67     upload_count = 0
       
    68     skip_count = 0
       
    69 
       
    70     option_list = BaseCommand.option_list + (
       
    71         optparse.make_option('-p', '--prefix',
       
    72             dest='prefix', default='',
       
    73             help="The prefix to prepend to the path on S3."),
       
    74         optparse.make_option('-d', '--dir',
       
    75             dest='dir', default=settings.MEDIA_ROOT,
       
    76             help="The root directory to use instead of your MEDIA_ROOT"),
       
    77         optparse.make_option('--gzip',
       
    78             action='store_true', dest='gzip', default=False,
       
    79             help="Enables gzipping CSS and Javascript files."),
       
    80         optparse.make_option('--expires',
       
    81             action='store_true', dest='expires', default=False,
       
    82             help="Enables setting a far future expires header."),
       
    83         optparse.make_option('--force',
       
    84             action='store_true', dest='force', default=False,
       
    85             help="Skip the file mtime check to force upload of all files."),
       
    86         optparse.make_option('--filter-list', dest='filter_list',
       
    87             action='store', default='',
       
    88             help="Override default directory and file exclusion filters. (enter as comma seperated line)"),
       
    89     )
       
    90 
       
    91     help = 'Syncs the complete MEDIA_ROOT structure and files to S3 into the given bucket name.'
       
    92     args = 'bucket_name'
       
    93 
       
    94     can_import_settings = True
       
    95 
       
    96     def handle(self, *args, **options):
       
    97 
       
    98         # Check for AWS keys in settings
       
    99         if not hasattr(settings, 'AWS_ACCESS_KEY_ID') or \
       
   100            not hasattr(settings, 'AWS_SECRET_ACCESS_KEY'):
       
   101            raise CommandError('Missing AWS keys from settings file.  Please' +
       
   102                      'supply both AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY.')
       
   103         else:
       
   104             self.AWS_ACCESS_KEY_ID = settings.AWS_ACCESS_KEY_ID
       
   105             self.AWS_SECRET_ACCESS_KEY = settings.AWS_SECRET_ACCESS_KEY
       
   106 
       
   107         if not hasattr(settings, 'AWS_BUCKET_NAME'):
       
   108             raise CommandError('Missing bucket name from settings file. Please' +
       
   109                 ' add the AWS_BUCKET_NAME to your settings file.')
       
   110         else:
       
   111             if not settings.AWS_BUCKET_NAME:
       
   112                 raise CommandError('AWS_BUCKET_NAME cannot be empty.')
       
   113         self.AWS_BUCKET_NAME = settings.AWS_BUCKET_NAME
       
   114 
       
   115         if not hasattr(settings, 'MEDIA_ROOT'):
       
   116             raise CommandError('MEDIA_ROOT must be set in your settings.')
       
   117         else:
       
   118             if not settings.MEDIA_ROOT:
       
   119                 raise CommandError('MEDIA_ROOT must be set in your settings.')
       
   120 
       
   121         self.verbosity = int(options.get('verbosity'))
       
   122         self.prefix = options.get('prefix')
       
   123         self.do_gzip = options.get('gzip')
       
   124         self.do_expires = options.get('expires')
       
   125         self.do_force = options.get('force')
       
   126         self.DIRECTORY = options.get('dir')
       
   127         self.FILTER_LIST = getattr(settings, 'FILTER_LIST', self.FILTER_LIST)
       
   128         filter_list = options.get('filter_list').split(',')
       
   129         if filter_list:
       
   130             # command line option overrides default filter_list and
       
   131             # settings.filter_list
       
   132             self.FILTER_LIST = filter_list
       
   133 
       
   134         # Now call the syncing method to walk the MEDIA_ROOT directory and
       
   135         # upload all files found.
       
   136         self.sync_s3()
       
   137 
       
   138         print
       
   139         print "%d files uploaded." % (self.upload_count)
       
   140         print "%d files skipped." % (self.skip_count)
       
   141 
       
   142     def sync_s3(self):
       
   143         """
       
   144         Walks the media directory and syncs files to S3
       
   145         """
       
   146         bucket, key = self.open_s3()
       
   147         os.path.walk(self.DIRECTORY, self.upload_s3,
       
   148             (bucket, key, self.AWS_BUCKET_NAME, self.DIRECTORY))
       
   149 
       
   150     def compress_string(self, s):
       
   151         """Gzip a given string."""
       
   152         import cStringIO, gzip
       
   153         zbuf = cStringIO.StringIO()
       
   154         zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuf)
       
   155         zfile.write(s)
       
   156         zfile.close()
       
   157         return zbuf.getvalue()
       
   158 
       
   159     def open_s3(self):
       
   160         """
       
   161         Opens connection to S3 returning bucket and key
       
   162         """
       
   163         conn = boto.connect_s3(self.AWS_ACCESS_KEY_ID, self.AWS_SECRET_ACCESS_KEY)
       
   164         try:
       
   165             bucket = conn.get_bucket(self.AWS_BUCKET_NAME)
       
   166         except boto.exception.S3ResponseError:
       
   167             bucket = conn.create_bucket(self.AWS_BUCKET_NAME)
       
   168         return bucket, boto.s3.key.Key(bucket)
       
   169 
       
   170     def upload_s3(self, arg, dirname, names):
       
   171         """
       
   172         This is the callback to os.path.walk and where much of the work happens
       
   173         """
       
   174         bucket, key, bucket_name, root_dir = arg # expand arg tuple
       
   175 
       
   176         # Skip directories we don't want to sync
       
   177         if os.path.basename(dirname) in self.FILTER_LIST:
       
   178             # prevent walk from processing subfiles/subdirs below the ignored one
       
   179             del names[:]
       
   180             return 
       
   181 
       
   182         # Later we assume the MEDIA_ROOT ends with a trailing slash
       
   183         if not root_dir.endswith(os.path.sep):
       
   184             root_dir = root_dir + os.path.sep
       
   185 
       
   186         for file in names:
       
   187             headers = {}
       
   188 
       
   189             if file in self.FILTER_LIST:
       
   190                 continue # Skip files we don't want to sync
       
   191 
       
   192             filename = os.path.join(dirname, file)
       
   193             if os.path.isdir(filename):
       
   194                 continue # Don't try to upload directories
       
   195 
       
   196             file_key = filename[len(root_dir):]
       
   197             if self.prefix:
       
   198                 file_key = '%s/%s' % (self.prefix, file_key)
       
   199 
       
   200             # Check if file on S3 is older than local file, if so, upload
       
   201             if not self.do_force:
       
   202                 s3_key = bucket.get_key(file_key)
       
   203                 if s3_key:
       
   204                     s3_datetime = datetime.datetime(*time.strptime(
       
   205                         s3_key.last_modified, '%a, %d %b %Y %H:%M:%S %Z')[0:6])
       
   206                     local_datetime = datetime.datetime.utcfromtimestamp(
       
   207                         os.stat(filename).st_mtime)
       
   208                     if local_datetime < s3_datetime:
       
   209                         self.skip_count += 1
       
   210                         if self.verbosity > 1:
       
   211                             print "File %s hasn't been modified since last " \
       
   212                                 "being uploaded" % (file_key)
       
   213                         continue
       
   214 
       
   215             # File is newer, let's process and upload
       
   216             if self.verbosity > 0:
       
   217                 print "Uploading %s..." % (file_key)
       
   218 
       
   219             content_type = mimetypes.guess_type(filename)[0]
       
   220             if content_type:
       
   221                 headers['Content-Type'] = content_type
       
   222             file_obj = open(filename, 'rb')
       
   223             file_size = os.fstat(file_obj.fileno()).st_size
       
   224             filedata = file_obj.read()
       
   225             if self.do_gzip:
       
   226                 # Gzipping only if file is large enough (>1K is recommended) 
       
   227                 # and only if file is a common text type (not a binary file)
       
   228                 if file_size > 1024 and content_type in self.GZIP_CONTENT_TYPES:
       
   229                     filedata = self.compress_string(filedata)
       
   230                     headers['Content-Encoding'] = 'gzip'
       
   231                     if self.verbosity > 1:
       
   232                         print "\tgzipped: %dk to %dk" % \
       
   233                             (file_size/1024, len(filedata)/1024)
       
   234             if self.do_expires:
       
   235                 # HTTP/1.0
       
   236                 headers['Expires'] = '%s GMT' % (email.Utils.formatdate(
       
   237                     time.mktime((datetime.datetime.now() +
       
   238                     datetime.timedelta(days=365*2)).timetuple())))
       
   239                 # HTTP/1.1
       
   240                 headers['Cache-Control'] = 'max-age %d' % (3600 * 24 * 365 * 2)
       
   241                 if self.verbosity > 1:
       
   242                     print "\texpires: %s" % (headers['Expires'])
       
   243                     print "\tcache-control: %s" % (headers['Cache-Control'])
       
   244 
       
   245             try:
       
   246                 key.name = file_key
       
   247                 key.set_contents_from_string(filedata, headers, replace=True)
       
   248                 key.set_acl('public-read')
       
   249             except boto.s3.connection.S3CreateError, e:
       
   250                 print "Failed: %s" % e
       
   251             except Exception, e:
       
   252                 print e
       
   253                 raise
       
   254             else:
       
   255                 self.upload_count += 1
       
   256 
       
   257             file_obj.close()
       
   258 
       
   259 # Backwards compatibility for Django r9110
       
   260 if not [opt for opt in Command.option_list if opt.dest=='verbosity']:
       
   261     Command.option_list += (
       
   262         optparse.make_option('-v', '--verbosity',
       
   263             dest='verbosity', default=1, action='count',
       
   264             help="Verbose mode. Multiple -v options increase the verbosity."),
       
   265     )