web/lib/django/contrib/gis/utils/layermapping.py
changeset 38 77b6da96e6f1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/lib/django/contrib/gis/utils/layermapping.py	Wed Jun 02 18:57:35 2010 +0200
@@ -0,0 +1,602 @@
+# LayerMapping -- A Django Model/OGR Layer Mapping Utility
+"""
+ The LayerMapping class provides a way to map the contents of OGR
+ vector files (e.g. SHP files) to Geographic-enabled Django models.
+
+ For more information, please consult the GeoDjango documentation:
+   http://geodjango.org/docs/layermapping.html
+"""
+import sys
+from datetime import date, datetime
+from decimal import Decimal
+from django.core.exceptions import ObjectDoesNotExist
+from django.db import connections, DEFAULT_DB_ALIAS
+from django.contrib.gis.db.models import GeometryField
+from django.contrib.gis.gdal import CoordTransform, DataSource, \
+    OGRException, OGRGeometry, OGRGeomType, SpatialReference
+from django.contrib.gis.gdal.field import \
+    OFTDate, OFTDateTime, OFTInteger, OFTReal, OFTString, OFTTime
+from django.db import models, transaction
+from django.contrib.localflavor.us.models import USStateField
+
+# LayerMapping exceptions.
+class LayerMapError(Exception): pass
+class InvalidString(LayerMapError): pass
+class InvalidDecimal(LayerMapError): pass
+class InvalidInteger(LayerMapError): pass
+class MissingForeignKey(LayerMapError): pass
+
+class LayerMapping(object):
+    "A class that maps OGR Layers to GeoDjango Models."
+
+    # Acceptable 'base' types for a multi-geometry type.
+    MULTI_TYPES = {1 : OGRGeomType('MultiPoint'),
+                   2 : OGRGeomType('MultiLineString'),
+                   3 : OGRGeomType('MultiPolygon'),
+                   OGRGeomType('Point25D').num : OGRGeomType('MultiPoint25D'),
+                   OGRGeomType('LineString25D').num : OGRGeomType('MultiLineString25D'),
+                   OGRGeomType('Polygon25D').num : OGRGeomType('MultiPolygon25D'),
+                   }
+
+    # Acceptable Django field types and corresponding acceptable OGR
+    # counterparts.
+    FIELD_TYPES = {
+        models.AutoField : OFTInteger,
+        models.IntegerField : (OFTInteger, OFTReal, OFTString),
+        models.FloatField : (OFTInteger, OFTReal),
+        models.DateField : OFTDate,
+        models.DateTimeField : OFTDateTime,
+        models.EmailField : OFTString,
+        models.TimeField : OFTTime,
+        models.DecimalField : (OFTInteger, OFTReal),
+        models.CharField : OFTString,
+        models.SlugField : OFTString,
+        models.TextField : OFTString,
+        models.URLField : OFTString,
+        USStateField : OFTString,
+        models.XMLField : OFTString,
+        models.SmallIntegerField : (OFTInteger, OFTReal, OFTString),
+        models.PositiveSmallIntegerField : (OFTInteger, OFTReal, OFTString),
+        }
+
+    # The acceptable transaction modes.
+    TRANSACTION_MODES = {'autocommit' : transaction.autocommit,
+                         'commit_on_success' : transaction.commit_on_success,
+                         }
+
+    def __init__(self, model, data, mapping, layer=0,
+                 source_srs=None, encoding=None,
+                 transaction_mode='commit_on_success',
+                 transform=True, unique=None, using=DEFAULT_DB_ALIAS):
+        """
+        A LayerMapping object is initialized using the given Model (not an instance),
+        a DataSource (or string path to an OGR-supported data file), and a mapping
+        dictionary.  See the module level docstring for more details and keyword
+        argument usage.
+        """
+        # Getting the DataSource and the associated Layer.
+        if isinstance(data, basestring):
+            self.ds = DataSource(data)
+        else:
+            self.ds = data
+        self.layer = self.ds[layer]
+
+        self.using = using
+        self.spatial_backend = connections[using].ops
+
+        # Setting the mapping & model attributes.
+        self.mapping = mapping
+        self.model = model
+
+        # Checking the layer -- intitialization of the object will fail if
+        # things don't check out before hand.
+        self.check_layer()
+
+        # Getting the geometry column associated with the model (an
+        # exception will be raised if there is no geometry column).
+        if self.spatial_backend.mysql:
+            transform = False
+        else:
+            self.geo_field = self.geometry_field()
+
+        # Checking the source spatial reference system, and getting
+        # the coordinate transformation object (unless the `transform`
+        # keyword is set to False)
+        if transform:
+            self.source_srs = self.check_srs(source_srs)
+            self.transform = self.coord_transform()
+        else:
+            self.transform = transform
+
+        # Setting the encoding for OFTString fields, if specified.
+        if encoding:
+            # Making sure the encoding exists, if not a LookupError
+            # exception will be thrown.
+            from codecs import lookup
+            lookup(encoding)
+            self.encoding = encoding
+        else:
+            self.encoding = None
+
+        if unique:
+            self.check_unique(unique)
+            transaction_mode = 'autocommit' # Has to be set to autocommit.
+            self.unique = unique
+        else:
+            self.unique = None
+
+        # Setting the transaction decorator with the function in the
+        # transaction modes dictionary.
+        if transaction_mode in self.TRANSACTION_MODES:
+            self.transaction_decorator = self.TRANSACTION_MODES[transaction_mode]
+            self.transaction_mode = transaction_mode
+        else:
+            raise LayerMapError('Unrecognized transaction mode: %s' % transaction_mode)
+
+        if using is None:
+            pass
+
+    #### Checking routines used during initialization ####
+    def check_fid_range(self, fid_range):
+        "This checks the `fid_range` keyword."
+        if fid_range:
+            if isinstance(fid_range, (tuple, list)):
+                return slice(*fid_range)
+            elif isinstance(fid_range, slice):
+                return fid_range
+            else:
+                raise TypeError
+        else:
+            return None
+
+    def check_layer(self):
+        """
+        This checks the Layer metadata, and ensures that it is compatible
+        with the mapping information and model.  Unlike previous revisions,
+        there is no need to increment through each feature in the Layer.
+        """
+        # The geometry field of the model is set here.
+        # TODO: Support more than one geometry field / model.  However, this
+        # depends on the GDAL Driver in use.
+        self.geom_field = False
+        self.fields = {}
+
+        # Getting lists of the field names and the field types available in
+        # the OGR Layer.
+        ogr_fields = self.layer.fields
+        ogr_field_types = self.layer.field_types
+
+        # Function for determining if the OGR mapping field is in the Layer.
+        def check_ogr_fld(ogr_map_fld):
+            try:
+                idx = ogr_fields.index(ogr_map_fld)
+            except ValueError:
+                raise LayerMapError('Given mapping OGR field "%s" not found in OGR Layer.' % ogr_map_fld)
+            return idx
+
+        # No need to increment through each feature in the model, simply check
+        # the Layer metadata against what was given in the mapping dictionary.
+        for field_name, ogr_name in self.mapping.items():
+            # Ensuring that a corresponding field exists in the model
+            # for the given field name in the mapping.
+            try:
+                model_field = self.model._meta.get_field(field_name)
+            except models.fields.FieldDoesNotExist:
+                raise LayerMapError('Given mapping field "%s" not in given Model fields.' % field_name)
+
+            # Getting the string name for the Django field class (e.g., 'PointField').
+            fld_name = model_field.__class__.__name__
+
+            if isinstance(model_field, GeometryField):
+                if self.geom_field:
+                    raise LayerMapError('LayerMapping does not support more than one GeometryField per model.')
+
+                # Getting the coordinate dimension of the geometry field.
+                coord_dim = model_field.dim
+
+                try:
+                    if coord_dim == 3:
+                        gtype = OGRGeomType(ogr_name + '25D')
+                    else:
+                        gtype = OGRGeomType(ogr_name)
+                except OGRException:
+                    raise LayerMapError('Invalid mapping for GeometryField "%s".' % field_name)
+
+                # Making sure that the OGR Layer's Geometry is compatible.
+                ltype = self.layer.geom_type
+                if not (ltype.name.startswith(gtype.name) or self.make_multi(ltype, model_field)):
+                    raise LayerMapError('Invalid mapping geometry; model has %s%s, '
+                                        'layer geometry type is %s.' %
+                                        (fld_name, (coord_dim == 3 and '(dim=3)') or '', ltype))
+
+                # Setting the `geom_field` attribute w/the name of the model field
+                # that is a Geometry.  Also setting the coordinate dimension
+                # attribute.
+                self.geom_field = field_name
+                self.coord_dim = coord_dim
+                fields_val = model_field
+            elif isinstance(model_field, models.ForeignKey):
+                if isinstance(ogr_name, dict):
+                    # Is every given related model mapping field in the Layer?
+                    rel_model = model_field.rel.to
+                    for rel_name, ogr_field in ogr_name.items():
+                        idx = check_ogr_fld(ogr_field)
+                        try:
+                            rel_field = rel_model._meta.get_field(rel_name)
+                        except models.fields.FieldDoesNotExist:
+                            raise LayerMapError('ForeignKey mapping field "%s" not in %s fields.' %
+                                                (rel_name, rel_model.__class__.__name__))
+                    fields_val = rel_model
+                else:
+                    raise TypeError('ForeignKey mapping must be of dictionary type.')
+            else:
+                # Is the model field type supported by LayerMapping?
+                if not model_field.__class__ in self.FIELD_TYPES:
+                    raise LayerMapError('Django field type "%s" has no OGR mapping (yet).' % fld_name)
+
+                # Is the OGR field in the Layer?
+                idx = check_ogr_fld(ogr_name)
+                ogr_field = ogr_field_types[idx]
+
+                # Can the OGR field type be mapped to the Django field type?
+                if not issubclass(ogr_field, self.FIELD_TYPES[model_field.__class__]):
+                    raise LayerMapError('OGR field "%s" (of type %s) cannot be mapped to Django %s.' %
+                                        (ogr_field, ogr_field.__name__, fld_name))
+                fields_val = model_field
+
+            self.fields[field_name] = fields_val
+
+    def check_srs(self, source_srs):
+        "Checks the compatibility of the given spatial reference object."
+
+        if isinstance(source_srs, SpatialReference):
+            sr = source_srs
+        elif isinstance(source_srs, self.spatial_backend.spatial_ref_sys()):
+            sr = source_srs.srs
+        elif isinstance(source_srs, (int, basestring)):
+            sr = SpatialReference(source_srs)
+        else:
+            # Otherwise just pulling the SpatialReference from the layer
+            sr = self.layer.srs
+
+        if not sr:
+            raise LayerMapError('No source reference system defined.')
+        else:
+            return sr
+
+    def check_unique(self, unique):
+        "Checks the `unique` keyword parameter -- may be a sequence or string."
+        if isinstance(unique, (list, tuple)):
+            # List of fields to determine uniqueness with
+            for attr in unique:
+                if not attr in self.mapping: raise ValueError
+        elif isinstance(unique, basestring):
+            # Only a single field passed in.
+            if unique not in self.mapping: raise ValueError
+        else:
+            raise TypeError('Unique keyword argument must be set with a tuple, list, or string.')
+
+    #### Keyword argument retrieval routines ####
+    def feature_kwargs(self, feat):
+        """
+        Given an OGR Feature, this will return a dictionary of keyword arguments
+        for constructing the mapped model.
+        """
+        # The keyword arguments for model construction.
+        kwargs = {}
+
+        # Incrementing through each model field and OGR field in the
+        # dictionary mapping.
+        for field_name, ogr_name in self.mapping.items():
+            model_field = self.fields[field_name]
+
+            if isinstance(model_field, GeometryField):
+                # Verify OGR geometry.
+                val = self.verify_geom(feat.geom, model_field)
+            elif isinstance(model_field, models.base.ModelBase):
+                # The related _model_, not a field was passed in -- indicating
+                # another mapping for the related Model.
+                val = self.verify_fk(feat, model_field, ogr_name)
+            else:
+                # Otherwise, verify OGR Field type.
+                val = self.verify_ogr_field(feat[ogr_name], model_field)
+
+            # Setting the keyword arguments for the field name with the
+            # value obtained above.
+            kwargs[field_name] = val
+
+        return kwargs
+
+    def unique_kwargs(self, kwargs):
+        """
+        Given the feature keyword arguments (from `feature_kwargs`) this routine
+        will construct and return the uniqueness keyword arguments -- a subset
+        of the feature kwargs.
+        """
+        if isinstance(self.unique, basestring):
+            return {self.unique : kwargs[self.unique]}
+        else:
+            return dict((fld, kwargs[fld]) for fld in self.unique)
+
+    #### Verification routines used in constructing model keyword arguments. ####
+    def verify_ogr_field(self, ogr_field, model_field):
+        """
+        Verifies if the OGR Field contents are acceptable to the Django
+        model field.  If they are, the verified value is returned,
+        otherwise the proper exception is raised.
+        """
+        if (isinstance(ogr_field, OFTString) and
+            isinstance(model_field, (models.CharField, models.TextField))):
+            if self.encoding:
+                # The encoding for OGR data sources may be specified here
+                # (e.g., 'cp437' for Census Bureau boundary files).
+                val = unicode(ogr_field.value, self.encoding)
+            else:
+                val = ogr_field.value
+                if len(val) > model_field.max_length:
+                    raise InvalidString('%s model field maximum string length is %s, given %s characters.' %
+                                        (model_field.name, model_field.max_length, len(val)))
+        elif isinstance(ogr_field, OFTReal) and isinstance(model_field, models.DecimalField):
+            try:
+                # Creating an instance of the Decimal value to use.
+                d = Decimal(str(ogr_field.value))
+            except:
+                raise InvalidDecimal('Could not construct decimal from: %s' % ogr_field.value)
+
+            # Getting the decimal value as a tuple.
+            dtup = d.as_tuple()
+            digits = dtup[1]
+            d_idx = dtup[2] # index where the decimal is
+
+            # Maximum amount of precision, or digits to the left of the decimal.
+            max_prec = model_field.max_digits - model_field.decimal_places
+
+            # Getting the digits to the left of the decimal place for the
+            # given decimal.
+            if d_idx < 0:
+                n_prec = len(digits[:d_idx])
+            else:
+                n_prec = len(digits) + d_idx
+
+            # If we have more than the maximum digits allowed, then throw an
+            # InvalidDecimal exception.
+            if n_prec > max_prec:
+                raise InvalidDecimal('A DecimalField with max_digits %d, decimal_places %d must round to an absolute value less than 10^%d.' %
+                                     (model_field.max_digits, model_field.decimal_places, max_prec))
+            val = d
+        elif isinstance(ogr_field, (OFTReal, OFTString)) and isinstance(model_field, models.IntegerField):
+            # Attempt to convert any OFTReal and OFTString value to an OFTInteger.
+            try:
+                val = int(ogr_field.value)
+            except:
+                raise InvalidInteger('Could not construct integer from: %s' % ogr_field.value)
+        else:
+            val = ogr_field.value
+        return val
+
+    def verify_fk(self, feat, rel_model, rel_mapping):
+        """
+        Given an OGR Feature, the related model and its dictionary mapping,
+        this routine will retrieve the related model for the ForeignKey
+        mapping.
+        """
+        # TODO: It is expensive to retrieve a model for every record --
+        #  explore if an efficient mechanism exists for caching related
+        #  ForeignKey models.
+
+        # Constructing and verifying the related model keyword arguments.
+        fk_kwargs = {}
+        for field_name, ogr_name in rel_mapping.items():
+            fk_kwargs[field_name] = self.verify_ogr_field(feat[ogr_name], rel_model._meta.get_field(field_name))
+
+        # Attempting to retrieve and return the related model.
+        try:
+            return rel_model.objects.get(**fk_kwargs)
+        except ObjectDoesNotExist:
+            raise MissingForeignKey('No ForeignKey %s model found with keyword arguments: %s' % (rel_model.__name__, fk_kwargs))
+
+    def verify_geom(self, geom, model_field):
+        """
+        Verifies the geometry -- will construct and return a GeometryCollection
+        if necessary (for example if the model field is MultiPolygonField while
+        the mapped shapefile only contains Polygons).
+        """
+        # Downgrade a 3D geom to a 2D one, if necessary.
+        if self.coord_dim != geom.coord_dim:
+            geom.coord_dim = self.coord_dim
+
+        if self.make_multi(geom.geom_type, model_field):
+            # Constructing a multi-geometry type to contain the single geometry
+            multi_type = self.MULTI_TYPES[geom.geom_type.num]
+            g = OGRGeometry(multi_type)
+            g.add(geom)
+        else:
+            g = geom
+
+        # Transforming the geometry with our Coordinate Transformation object,
+        # but only if the class variable `transform` is set w/a CoordTransform
+        # object.
+        if self.transform: g.transform(self.transform)
+
+        # Returning the WKT of the geometry.
+        return g.wkt
+
+    #### Other model methods ####
+    def coord_transform(self):
+        "Returns the coordinate transformation object."
+        SpatialRefSys = self.spatial_backend.spatial_ref_sys()
+        try:
+            # Getting the target spatial reference system
+            target_srs = SpatialRefSys.objects.get(srid=self.geo_field.srid).srs
+
+            # Creating the CoordTransform object
+            return CoordTransform(self.source_srs, target_srs)
+        except Exception, msg:
+            raise LayerMapError('Could not translate between the data source and model geometry: %s' % msg)
+
+    def geometry_field(self):
+        "Returns the GeometryField instance associated with the geographic column."
+        # Use the `get_field_by_name` on the model's options so that we
+        # get the correct field instance if there's model inheritance.
+        opts = self.model._meta
+        fld, model, direct, m2m = opts.get_field_by_name(self.geom_field)
+        return fld
+
+    def make_multi(self, geom_type, model_field):
+        """
+        Given the OGRGeomType for a geometry and its associated GeometryField,
+        determine whether the geometry should be turned into a GeometryCollection.
+        """
+        return (geom_type.num in self.MULTI_TYPES and
+                model_field.__class__.__name__ == 'Multi%s' % geom_type.django)
+
+    def save(self, verbose=False, fid_range=False, step=False,
+             progress=False, silent=False, stream=sys.stdout, strict=False):
+        """
+        Saves the contents from the OGR DataSource Layer into the database
+        according to the mapping dictionary given at initialization.
+
+        Keyword Parameters:
+         verbose:
+           If set, information will be printed subsequent to each model save
+           executed on the database.
+
+         fid_range:
+           May be set with a slice or tuple of (begin, end) feature ID's to map
+           from the data source.  In other words, this keyword enables the user
+           to selectively import a subset range of features in the geographic
+           data source.
+
+         step:
+           If set with an integer, transactions will occur at every step
+           interval. For example, if step=1000, a commit would occur after
+           the 1,000th feature, the 2,000th feature etc.
+
+         progress:
+           When this keyword is set, status information will be printed giving
+           the number of features processed and sucessfully saved.  By default,
+           progress information will pe printed every 1000 features processed,
+           however, this default may be overridden by setting this keyword with an
+           integer for the desired interval.
+
+         stream:
+           Status information will be written to this file handle.  Defaults to
+           using `sys.stdout`, but any object with a `write` method is supported.
+
+         silent:
+           By default, non-fatal error notifications are printed to stdout, but
+           this keyword may be set to disable these notifications.
+
+         strict:
+           Execution of the model mapping will cease upon the first error
+           encountered.  The default behavior is to attempt to continue.
+        """
+        # Getting the default Feature ID range.
+        default_range = self.check_fid_range(fid_range)
+
+        # Setting the progress interval, if requested.
+        if progress:
+            if progress is True or not isinstance(progress, int):
+                progress_interval = 1000
+            else:
+                progress_interval = progress
+
+        # Defining the 'real' save method, utilizing the transaction
+        # decorator created during initialization.
+        @self.transaction_decorator
+        def _save(feat_range=default_range, num_feat=0, num_saved=0):
+            if feat_range:
+                layer_iter = self.layer[feat_range]
+            else:
+                layer_iter = self.layer
+
+            for feat in layer_iter:
+                num_feat += 1
+                # Getting the keyword arguments
+                try:
+                    kwargs = self.feature_kwargs(feat)
+                except LayerMapError, msg:
+                    # Something borked the validation
+                    if strict: raise
+                    elif not silent:
+                        stream.write('Ignoring Feature ID %s because: %s\n' % (feat.fid, msg))
+                else:
+                    # Constructing the model using the keyword args
+                    is_update = False
+                    if self.unique:
+                        # If we want unique models on a particular field, handle the
+                        # geometry appropriately.
+                        try:
+                            # Getting the keyword arguments and retrieving
+                            # the unique model.
+                            u_kwargs = self.unique_kwargs(kwargs)
+                            m = self.model.objects.using(self.using).get(**u_kwargs)
+                            is_update = True
+
+                            # Getting the geometry (in OGR form), creating
+                            # one from the kwargs WKT, adding in additional
+                            # geometries, and update the attribute with the
+                            # just-updated geometry WKT.
+                            geom = getattr(m, self.geom_field).ogr
+                            new = OGRGeometry(kwargs[self.geom_field])
+                            for g in new: geom.add(g)
+                            setattr(m, self.geom_field, geom.wkt)
+                        except ObjectDoesNotExist:
+                            # No unique model exists yet, create.
+                            m = self.model(**kwargs)
+                    else:
+                        m = self.model(**kwargs)
+
+                    try:
+                        # Attempting to save.
+                        m.save(using=self.using)
+                        num_saved += 1
+                        if verbose: stream.write('%s: %s\n' % (is_update and 'Updated' or 'Saved', m))
+                    except SystemExit:
+                        raise
+                    except Exception, msg:
+                        if self.transaction_mode == 'autocommit':
+                            # Rolling back the transaction so that other model saves
+                            # will work.
+                            transaction.rollback_unless_managed()
+                        if strict:
+                            # Bailing out if the `strict` keyword is set.
+                            if not silent:
+                                stream.write('Failed to save the feature (id: %s) into the model with the keyword arguments:\n' % feat.fid)
+                                stream.write('%s\n' % kwargs)
+                            raise
+                        elif not silent:
+                            stream.write('Failed to save %s:\n %s\nContinuing\n' % (kwargs, msg))
+
+                # Printing progress information, if requested.
+                if progress and num_feat % progress_interval == 0:
+                    stream.write('Processed %d features, saved %d ...\n' % (num_feat, num_saved))
+
+            # Only used for status output purposes -- incremental saving uses the
+            # values returned here.
+            return num_saved, num_feat
+
+        nfeat = self.layer.num_feat
+        if step and isinstance(step, int) and step < nfeat:
+            # Incremental saving is requested at the given interval (step)
+            if default_range:
+                raise LayerMapError('The `step` keyword may not be used in conjunction with the `fid_range` keyword.')
+            beg, num_feat, num_saved = (0, 0, 0)
+            indices = range(step, nfeat, step)
+            n_i = len(indices)
+
+            for i, end in enumerate(indices):
+                # Constructing the slice to use for this step; the last slice is
+                # special (e.g, [100:] instead of [90:100]).
+                if i+1 == n_i: step_slice = slice(beg, None)
+                else: step_slice = slice(beg, end)
+
+                try:
+                    num_feat, num_saved = _save(step_slice, num_feat, num_saved)
+                    beg = end
+                except:
+                    stream.write('%s\nFailed to save slice: %s\n' % ('=-' * 20, step_slice))
+                    raise
+        else:
+            # Otherwise, just calling the previously defined _save() function.
+            _save()