# -*- coding: utf-8 -*-
from south.utils import datetime_utils as datetime
from south.db import db
from south.v2 import DataMigration
from django.db import models
from django.conf import settings
from hdabo.utils import show_progress
from SPARQLWrapper import SPARQLWrapper, JSON


class Migration(DataMigration):

    def forwards(self, orm):
        "Write your forwards methods here."
        # Note: Don't use "from appname.models import ModelName". 
        # Use orm.ModelName to refer to models in this application,
        # and orm['appname.ModelName'] for models in other applications.
        queryset = orm['hdabo.tag'].objects.all() 
        endpoint = SPARQLWrapper(settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' ), returnFormat=JSON)
        
        # First pass with page id
        print "First pass"
        self.update_queryset(endpoint, queryset, "select distinct * where { ?s dbpedia-owl:wikiPageID %s }", "pageid", True)
         
        # Second pass with wikipedia url
        queryset = orm['hdabo.tag'].objects.filter(dbpedia_uri="")
        print "Second pass"
        self.update_queryset(endpoint,  queryset, "select distinct * where { ?s foaf:isPrimaryTopicOf <%s> }")
        
        # Second pass with wikipedia url
        queryset = orm['hdabo.tag'].objects.filter(dbpedia_uri="")
        print "Third pass"
        self.update_queryset(endpoint, queryset, 'select distinct * where { ?s rdfs:label "%s"@fr . FILTER (regex(?s, "^http\\\\://[^:]+$")) }', "label")
        

        print "\n"
    
    def update_queryset(self, endpoint, queryset, query, param="url", empty_after=False):
        queryset_count = queryset.count()
        print "Processing %d tags\n" % (queryset_count)
        for i,tag in enumerate(queryset):
            show_progress(i+1, queryset_count, tag.label, 50)
            if tag.wikipedia_url and tag.wikipedia_url!="":
                p = tag.wikipedia_url
                if param=="pageid":
                    p = tag.wikipedia_pageid
                if param=="label":
                    p = tag.label
                endpoint.setQuery(query % p)
                res = endpoint.queryAndConvert()
                if 'results' in res and 'bindings' in res['results']:
                    l = len(res['results']['bindings'])
                    if l==1:
                        tag.dbpedia_uri = res['results']['bindings'][0]['s']['value']
                        tag.save()
                        #print "1 : " + tag.label.encode("utf8") + " : " + tag.dbpedia_uri.encode("utf8")
                        continue
                    elif l>1:
                        done = False
                        for b in res['results']['bindings']:
                            uri = b['s']['value']
                            uri_label = uri[uri.rfind("/")+1:]
                            if uri_label==tag.label.replace(" ","/"):
                                tag.dbpedia_uri = res['results']['bindings'][0]['s']['value']
                                #print "2 : " + tag.label.encode("utf8") + " : " + tag.dbpedia_uri.encode("utf8")
                                tag.save()
                                done = True
                                continue
                        if done:
                            continue
                        
            if empty_after:
                #print "3 : " + tag.label.encode("utf8")
                tag.dbpedia_uri = ""
                tag.save()
        

    def backwards(self, orm):
        "Write your backwards methods here."

    models = {
        u'auth.group': {
            'Meta': {'object_name': 'Group'},
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}),
            'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': u"orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'})
        },
        u'auth.permission': {
            'Meta': {'ordering': "(u'content_type__app_label', u'content_type__model', u'codename')", 'unique_together': "((u'content_type', u'codename'),)", 'object_name': 'Permission'},
            'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
            'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['contenttypes.ContentType']"}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
        },
        u'contenttypes.contenttype': {
            'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"},
            'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
            'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
        },
        u'hdabo.author': {
            'Meta': {'object_name': 'Author'},
            'firstname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}),
            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'lastname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'})
        },
        u'hdabo.datasheet': {
            'Meta': {'object_name': 'Datasheet'},
            'author': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Author']", 'null': 'True', 'blank': 'True'}),
            'college_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_periods_datasheets'", 'symmetrical': 'False', 'through': u"orm['hdabo.Datasheet_college_periods']", 'to': u"orm['hdabo.TimePeriod']"}),
            'college_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_themes_datasheets'", 'symmetrical': 'False', 'through': u"orm['hdabo.Datasheet_college_themes']", 'to': u"orm['hdabo.Domain']"}),
            'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
            'domains': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'datasheets'", 'symmetrical': 'False', 'through': u"orm['hdabo.Datasheet_domains']", 'to': u"orm['hdabo.Domain']"}),
            'format': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.DocumentFormat']", 'null': 'True', 'blank': 'True'}),
            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
            'highschool_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_periods_datasheets'", 'symmetrical': 'False', 'through': u"orm['hdabo.Datasheet_highschool_periods']", 'to': u"orm['hdabo.TimePeriod']"}),
            'highschool_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_themes_datasheets'", 'symmetrical': 'False', 'through': u"orm['hdabo.Datasheet_highschool_themes']", 'to': u"orm['hdabo.Domain']"}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'manual_order': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
            'modification_datetime': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
            'organisation': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Organisation']", 'null': 'True'}),
            'original_creation_date': ('django.db.models.fields.DateField', [], {}),
            'original_modification_date': ('django.db.models.fields.DateField', [], {}),
            'primary_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_periods_datasheets'", 'symmetrical': 'False', 'through': u"orm['hdabo.Datasheet_primary_periods']", 'to': u"orm['hdabo.TimePeriod']"}),
            'primary_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_themes_datasheets'", 'symmetrical': 'False', 'through': u"orm['hdabo.Datasheet_primary_themes']", 'to': u"orm['hdabo.Domain']"}),
            'tags': ('django.db.models.fields.related.ManyToManyField', [], {'to': u"orm['hdabo.Tag']", 'through': u"orm['hdabo.TaggedSheet']", 'symmetrical': 'False'}),
            'title': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
            'town': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Location']", 'null': 'True', 'blank': 'True'}),
            'url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
            'validated': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
            'validation_date': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
            'validator': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.User']", 'null': 'True', 'blank': 'True'})
        },
        u'hdabo.datasheet_college_periods': {
            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_periods'},
            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Datasheet']"}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.TimePeriod']"})
        },
        u'hdabo.datasheet_college_themes': {
            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_themes'},
            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Datasheet']"}),
            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Domain']"}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'sort_value': ('django.db.models.fields.IntegerField', [], {})
        },
        u'hdabo.datasheet_domains': {
            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_domains'},
            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Datasheet']"}),
            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Domain']"}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'sort_value': ('django.db.models.fields.IntegerField', [], {})
        },
        u'hdabo.datasheet_highschool_periods': {
            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_periods'},
            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Datasheet']"}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.TimePeriod']"})
        },
        u'hdabo.datasheet_highschool_themes': {
            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_themes'},
            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Datasheet']"}),
            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Domain']"}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'sort_value': ('django.db.models.fields.IntegerField', [], {})
        },
        u'hdabo.datasheet_primary_periods': {
            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_periods'},
            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Datasheet']"}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.TimePeriod']"})
        },
        u'hdabo.datasheet_primary_themes': {
            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_themes'},
            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Datasheet']"}),
            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Domain']"}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'sort_value': ('django.db.models.fields.IntegerField', [], {})
        },
        u'hdabo.documentformat': {
            'Meta': {'object_name': 'DocumentFormat'},
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
        },
        u'hdabo.domain': {
            'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'Domain'},
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
            'school_period': ('django.db.models.fields.IntegerField', [], {})
        },
        u'hdabo.folder': {
            'Meta': {'object_name': 'Folder'},
            'datasheets': ('django.db.models.fields.related.ManyToManyField', [], {'to': u"orm['hdabo.Datasheet']", 'symmetrical': 'False'}),
            'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'title': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
            'url': ('django.db.models.fields.URLField', [], {'unique': 'True', 'max_length': '2048'})
        },
        u'hdabo.location': {
            'Meta': {'object_name': 'Location'},
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'insee': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '5'}),
            'name': ('django.db.models.fields.CharField', [], {'max_length': '512'})
        },
        u'hdabo.organisation': {
            'Meta': {'object_name': 'Organisation'},
            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'location': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}),
            'name': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
            'website': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'})
        },
        u'hdabo.tag': {
            'Meta': {'unique_together': "(('label', 'original_label', 'url_status'),)", 'object_name': 'Tag'},
            'alias': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
            'alternative_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
            'alternative_wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}),
            'alternative_wikipedia_url': ('django.db.models.fields.URLField', [], {'db_index': 'True', 'max_length': '2048', 'null': 'True', 'blank': 'True'}),
            'category': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.TagCategory']", 'null': 'True', 'blank': 'True'}),
            'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
            'dbpedia_uri': ('django.db.models.fields.URLField', [], {'db_index': 'True', 'max_length': '2048', 'null': 'True', 'blank': 'True'}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
            'normalized_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
            'original_label': ('django.db.models.fields.CharField', [], {'max_length': '1024'}),
            'popularity': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
            'url_status': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'db_index': 'True', 'blank': 'True'}),
            'wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}),
            'wikipedia_url': ('django.db.models.fields.URLField', [], {'db_index': 'True', 'max_length': '2048', 'null': 'True', 'blank': 'True'})
        },
        u'hdabo.tagcategory': {
            'Meta': {'object_name': 'TagCategory'},
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
        },
        u'hdabo.taggedsheet': {
            'Meta': {'object_name': 'TaggedSheet'},
            'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Datasheet']"}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'index_note': ('django.db.models.fields.FloatField', [], {'default': '0.0', 'db_index': 'True'}),
            'order': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
            'original_order': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
            'tag': ('django.db.models.fields.related.ForeignKey', [], {'to': u"orm['hdabo.Tag']"}),
            'wikipedia_revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'})
        },
        u'hdabo.timeperiod': {
            'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'TimePeriod'},
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
            'school_period': ('django.db.models.fields.IntegerField', [], {})
        },
        u'hdabo.user': {
            'Meta': {'object_name': 'User', 'db_table': "'auth_user'"},
            'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
            'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}),
            'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
            'groups': ('django.db.models.fields.related.ManyToManyField', [], {'symmetrical': 'False', 'related_name': "u'user_set'", 'blank': 'True', 'to': u"orm['auth.Group']"}),
            u'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
            'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
            'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
            'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
            'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
            'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
            'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
            'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'symmetrical': 'False', 'related_name': "u'user_set'", 'blank': 'True', 'to': u"orm['auth.Permission']"}),
            'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'})
        }
    }

    complete_apps = ['hdabo']
    symmetrical = True
