query wp for categories and infoboxes
authorymh <ymh.work@gmail.com>
Tue, 17 Jan 2012 00:18:49 +0100
changeset 111 ceb381f5b0c7
parent 108 4b73a767a6c0
child 112 e7086d345a7c
query wp for categories and infoboxes
.settings/org.eclipse.core.resources.prefs
web/hdabo/management/commands/query_wikipedia.py
web/hdabo/management/commands/query_wikipedia_category.py
web/hdabo/migrations/0003_update_redirection.py
web/hdabo/migrations/0004_wp_category.py
web/hdabo/migrations/0005_wp_db_indexes.py
web/hdabo/models.py
web/hdabo/utils.py
web/hdabo/wp_utils.py
--- a/.settings/org.eclipse.core.resources.prefs	Mon Jan 09 03:19:43 2012 +0100
+++ b/.settings/org.eclipse.core.resources.prefs	Tue Jan 17 00:18:49 2012 +0100
@@ -1,4 +1,4 @@
-#Mon Jan 09 03:01:43 CET 2012
+#Mon Jan 16 02:39:01 CET 2012
 eclipse.preferences.version=1
 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/haystack/backends/__init__.py=utf-8
 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/fields.py=utf-8
@@ -10,8 +10,11 @@
 encoding//web/hdabo/management/commands/import_csv.py=utf-8
 encoding//web/hdabo/management/commands/import_tag_popularity.py=utf-8
 encoding//web/hdabo/management/commands/query_wikipedia.py=utf-8
+encoding//web/hdabo/management/commands/query_wikipedia_category.py=utf-8
 encoding//web/hdabo/migrations/0001_initial.py=utf-8
 encoding//web/hdabo/migrations/0002_backport_hdabo_sf.py=utf-8
+encoding//web/hdabo/migrations/0003_update_redirection.py=utf-8
+encoding//web/hdabo/migrations/0004_wp_category.py=utf-8
 encoding//web/hdabo/models.py=utf-8
 encoding//web/hdabo/search/french_whoosh_backend.py=utf-8
 encoding//web/hdabo/tests/models.py=utf-8
--- a/web/hdabo/management/commands/query_wikipedia.py	Mon Jan 09 03:19:43 2012 +0100
+++ b/web/hdabo/management/commands/query_wikipedia.py	Tue Jan 17 00:18:49 2012 +0100
@@ -61,34 +61,6 @@
                 return True
         return False
         
-
-    def process_wp_response(self, label, response):
-        
-
-        query_dict = response['query']
-        # get page if multiple pages or none -> return Tag.null_result
-        pages = query_dict.get("pages", {})
-        if len(pages) > 1 or len(pages) == 0:
-            return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
-        
-        page = pages.values()[0]
-        
-        if u"invalid" in page or u"missing" in page:
-            return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
-
-        url = page.get(u'fullurl', None)
-        pageid = page.get(u'pageid', None)
-        new_label = page[u'title']
-        
-        if self.__is_homonymie(page):
-            status = Tag.TAG_URL_STATUS_DICT["homonyme"]
-        elif u"redirect" in page:
-            status = Tag.TAG_URL_STATUS_DICT["redirection"]
-        else:
-            status = Tag.TAG_URL_STATUS_DICT["match"]
-        
-        return new_label, status, url, pageid 
-
     def show_progress(self, current_line, total_line, label, width):
 
         percent = (float(current_line) / float(total_line)) * 100.0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdabo/management/commands/query_wikipedia_category.py	Tue Jan 17 00:18:49 2012 +0100
@@ -0,0 +1,362 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jun 7, 2011
+
+@author: ymh
+'''
+
+from django.conf import settings
+from django.core.management.base import NoArgsCommand
+from django.core.management.color import no_style
+from hdabo.models import Tag, WpCategory, TagWpCategory, TagInfobox, InfoboxParameter
+from optparse import make_option
+from wikitools import api,wiki
+import sys
+import re
+import itertools
+from hdabo import utils
+from django.db.models import Count
+from django.db import transaction
+
+
+TYPES_MASK_DICT = {
+        u'visible': 0b001,
+        u'hidden': 0b010,
+        u'infobox': 0b100,
+        u'all': 0b111,
+    }
+
+START_PATTERN = re.compile(u"\{\{\s?Infobox\s+([^|]+)", re.M|re.U|re.I)
+END_PATTERN = re.compile(u"\{\{|\}\}", re.M|re.U)
+SPLIT_PATTERN = re.compile("\s*?\|\s*([\w]+[\w \t-]*)\s*=", re.U|re.M)
+DELIMITER_PATTERN = re.compile("\{{2,3}|\}{2,3}|\[\[|\]\]|\[|\]")
+
+
+
+class Command(NoArgsCommand):
+    '''
+    query and update wikipedia for tag title.
+    '''
+    options = ''
+    help = """query and update wikipedia for tag title."""
+    
+    option_list = NoArgsCommand.option_list + (
+        make_option('--all',
+            action='store_true',
+            dest='all',
+            default=False,
+            help='force all tags to be updated, not only those not yet processed'),
+        make_option('--force',
+            action='store_true',
+            dest='force',
+            default=False,
+            help='ask no questions'),
+        make_option('--random',
+            action='store_true',
+            dest='random',
+            default=False,
+            help='randomize query on tags'),
+        make_option('--site',
+            action='store',
+            type='string',
+            dest='site_url',
+            default="http://fr.wikipedia.org/w/api.php",
+            help='the url for the wikipedia site'),
+        make_option('--limit',
+            action='store',
+            type='int',
+            dest='limit',
+            default= -1,
+            help='number of tag to process'),
+        make_option('--start',
+            action='store',
+            type='int',
+            dest='start',
+            default=0,
+            help='number of tag to ignore'),
+        make_option('--type',
+            action='append',
+            dest='types',
+            type='choice',
+            choices=['visible','hidden', 'infobox', 'all'],
+            default=[],
+            help='what type of query to oerform : visible : visible categories, hidden : hidden categories, infobox: infoboxes, all: all of them. This option can be assed multiple times'
+        ),
+        )
+    
+    
+#    def process_wp_response(self, label, response):        
+#
+#        query_dict = response['query']
+#        # get page if multiple pages or none -> return Tag.null_result
+#        pages = query_dict.get("pages", {})
+#        if len(pages) > 1 or len(pages) == 0:
+#            return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
+#        
+#        page = pages.values()[0]
+#        
+#        if u"invalid" in page or u"missing" in page:
+#            return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
+#
+#        url = page.get(u'fullurl', None)
+#        pageid = page.get(u'pageid', None)
+#        new_label = page[u'title']
+#        
+#        if self.__is_homonymie(page):
+#            status = Tag.TAG_URL_STATUS_DICT["homonyme"]
+#        elif u"redirect" in page:
+#            status = Tag.TAG_URL_STATUS_DICT["redirection"]
+#        else:
+#            status = Tag.TAG_URL_STATUS_DICT["match"]
+#        
+#        return new_label, status, url, pageid 
+
+    def query_all_categories(self, hidden, site, pageid):
+        
+        clshow = 'hidden' if hidden else '!hidden'
+        params = {'action':'query', 'pageids': pageid, 'prop':'categories', 'clshow': clshow}
+        
+        clcontinue = ""        
+        res = []
+        
+        while clcontinue is not None:
+            if clcontinue:
+                params['clcontinue'] = clcontinue
+                
+            wpquery = api.APIRequest(site, params) #@UndefinedVariable
+            response = wpquery.query()
+            
+            query_dict = response.get('query', None)
+            
+            if query_dict is None:
+                return res
+            
+            pages = query_dict.get("pages", {})
+            if len(pages) > 1 or len(pages) == 0:
+                return res
+            
+            page = pages.values()[0]
+                        
+            for cat in page.get('categories',[]):
+                title = cat.get('title',"")
+                title = title[title.find(":")+1:]
+                if title and clcontinue != ("%s|%s" % (pageid,title)):
+                    res.append(title)
+            
+            clcontinue = response.get('query-continue', {}).get('categories',{}).get('clcontinue', None)
+            
+        return res
+    
+    def process_categories(self, cat_list, hidden, tag):
+        
+        for cat in cat_list:
+            wp_cat,created = WpCategory.objects.get_or_create(label=cat) #@UnusedVariable
+            TagWpCategory.objects.get_or_create(tag=tag, wp_category=wp_cat, hidden=hidden)
+            
+                
+    def query_infoboxes(self, site, pageid):
+        
+        res = []
+        params = {'action':'query', 'pageids': pageid, 'prop':'revisions', 'rvprop': 'ids|content'}
+        wpquery = api.APIRequest(site, params) #@UndefinedVariable
+        response = wpquery.query()
+        
+        query_dict = response.get('query', None)
+            
+        if query_dict is None:
+            return res
+            
+        pages = query_dict.get("pages", {})
+        if len(pages) > 1 or len(pages) == 0:
+            return res
+
+        page = pages.values()[0]
+        
+        if 'revisions' not in page or not page['revisions']:
+            return res
+        
+        rev = page['revisions'][0]
+        
+        content = rev['*']
+                
+        start = 0
+        depth = 0
+        current_infobox_name = None
+        current_start = 0
+        
+        while start <= len(content):
+            if depth==0:
+                resm = START_PATTERN.search(content[start:])
+                if resm is None:
+                    break
+                depth = 1
+                current_start = resm.start()+start
+                start += resm.end()+1
+                current_infobox_name = resm.group(1)                    
+            else:
+                resm = END_PATTERN.search(content[start:])
+                if resm is None:
+                    break
+                if resm.group(0) == "{{":
+                    depth += 1
+                elif resm.group(0) == "}}":
+                    depth -= 1
+                if depth == 0:
+                    res.append((content[current_start:resm.end()+start], current_infobox_name))
+                start += resm.end()+1
+
+        
+        return rev['revid'],res
+    
+    def split_infoboxes(self, src):
+        
+        start = 0
+        previous_end = 0
+        split_indexes = []
+        delimiter_stack = []
+        while start<=len(src):            
+            resd = DELIMITER_PATTERN.search(src[start:])
+            ress = SPLIT_PATTERN.search(src[start:]) if len(delimiter_stack) == 0 else None
+            startd = resd.start() if resd is not None else sys.maxint
+            starts = ress.start() if ress is not None else sys.maxint
+            if starts < startd:
+                if len(split_indexes)>0:
+                    split_indexes.append((previous_end, ress.start(0)+start))
+                split_indexes.append((ress.start(1)+start, ress.end(1)+start))
+                start += ress.end(0)
+                previous_end = start
+            elif startd < sys.maxint:
+                if resd.group().startswith("{") or resd.group().startswith("[") :
+                    delimiter_stack.append(resd.group())
+                elif len(delimiter_stack)>0 and ( (delimiter_stack[-1].startswith('{') and resd.group()[0] == '}') or (delimiter_stack[-1].startswith('[') and resd.group()[0] == ']') ) and len(delimiter_stack[-1]) == len(resd.group()):
+                    delimiter_stack.pop()
+                start += resd.end()
+            else:
+                break
+            
+        if previous_end > 0:
+            split_indexes.append((previous_end,len(src)))
+        res = [src[start:end] for start,end in split_indexes]
+        return res
+
+
+
+    def process_infoboxes(self, infobox_defs, tag):
+        
+        if not infobox_defs:
+            return
+        
+        revision_id = infobox_defs[0]
+        for infobox in infobox_defs[1]:
+            src = infobox[0].strip(' \t\n\r')            
+            name = infobox[1]
+            tag_infobox, created = TagInfobox.objects.get_or_create(tag=tag, name=name, revision_id = revision_id, defaults={'source': src})
+            if not created:
+                tag_infobox.source = src
+                tag_infobox.save()
+
+            src = START_PATTERN.sub('',src[:-2]).strip()
+            keyvalues = self.split_infoboxes(src)
+
+            for key,value in itertools.izip(*[itertools.islice(keyvalues, i, None, 2) for i in range(2)]):
+                param, created = InfoboxParameter.objects.get_or_create(tag_infobox=tag_infobox, param_name=key.strip(), defaults={'param_value':value.strip()})
+                if not created:
+                    param.param_value = value.strip()
+                    param.save()
+        
+    def handle_noargs(self, **options):
+        
+        self.style = no_style()
+        
+        interactive = options.get('interactive', True)
+        
+        verbosity = int(options.get('verbosity', '1'))
+        
+        force = options.get('force', False)
+        
+        limit = options.get("limit", -1)
+        start = options.get("start", 0)
+        
+        site_url = options.get('site_url', settings.WIKIPEDIA_API_URL)
+        
+        random = options.get('random', False)
+        
+        types_mask = 0
+        types_list = options.get('types', [])
+        
+        if len(types_list) == 0:
+            types_mask = TYPES_MASK_DICT['all']
+        else:
+            for t in types_list:
+                types_mask |=  TYPES_MASK_DICT[t]
+                
+        if verbosity > 1 :
+            print "types mask %s " % (bin(types_mask))  
+        
+        if verbosity > 2:
+            print "option passed : " + repr(options)
+
+
+        queryset = Tag.objects.exclude(wikipedia_pageid= None)
+        
+        if not options.get('all',False):
+            queryset = queryset.annotate(wpc=Count('wp_categories')).filter(wpc = 0)
+        #else:
+        #    queryset = Tag.objects.filter(url_status=None)                    
+        
+        if random:
+            queryset = queryset.order_by("?")
+        else:
+            queryset = queryset.order_by("label")
+        
+        if limit >= 0:
+            queryset = queryset[start:limit]
+        elif start > 0:
+            queryset = queryset[start:]            
+        
+        if verbosity > 2 :
+            print "Tag Query is %s" % (queryset.query)
+        
+        site = wiki.Wiki(site_url) #@UndefinedVariable
+        
+        
+        count = queryset.count()
+        if verbosity > 1:
+            print "Processing %d tags" % (count)
+        
+        if not force and interactive:
+            confirm = raw_input("You have requested to query and replace the wikipedia information for %d tags.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count))
+        else:
+            confirm = 'yes'
+            
+        if confirm != "yes":
+            print "wikipedia query cancelled"
+            return
+
+        
+        
+        for i, tag in enumerate(queryset):
+            
+            if verbosity > 1:
+                print "processing tag %s (%d/%d)" % (tag.label, i + 1, count)
+            else:
+                utils.show_progress(i + 1, count, tag.label, 60)                            
+
+            # query categories
+            wikipedia_pageid = tag.wikipedia_pageid
+            if tag.url_status == Tag.TAG_URL_STATUS_DICT['redirection'] and tag.alternative_wikipedia_pageid is not None :
+                wikipedia_pageid = tag.alternative_wikipedia_pageid
+
+            with transaction.commit_on_success():
+                if types_mask & TYPES_MASK_DICT['visible']:
+                    res = self.query_all_categories(False, site, wikipedia_pageid)
+                    self.process_categories(res, False, tag)
+    
+                if types_mask & TYPES_MASK_DICT['hidden']:
+                    res = self.query_all_categories(True, site, wikipedia_pageid)
+                    self.process_categories(res, True, tag)
+                
+                if types_mask & TYPES_MASK_DICT['infobox']:
+                    res = self.query_infoboxes(site, wikipedia_pageid)
+                    self.process_infoboxes(res, tag)
+            
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdabo/migrations/0003_update_redirection.py	Tue Jan 17 00:18:49 2012 +0100
@@ -0,0 +1,224 @@
+# encoding: utf-8
+import datetime
+from south.db import db
+from south.v2 import DataMigration
+from django.db import models
+from hdabo import utils, wp_utils
+import sys
+from wikitools import wiki
+
+class Migration(DataMigration):
+
+    def forwards(self, orm):
+        queryset = orm.Tag.objects.filter(url_status=1, alternative_label=None)
+        queryset_count = queryset.count() 
+        
+        sys.stdout.write("Processing %d tags\n" % (queryset_count))
+        sys.stdout.flush()
+        site = wiki.Wiki("http://fr.wikipedia.org/w/api.php")
+        
+        
+        for i,tag in enumerate(queryset):
+            utils.show_progress(i+1, queryset_count, tag.label, 50)
+            wp_res = wp_utils.query_wikipedia_title(site, pageid=tag.wikipedia_pageid)
+            tag.alternative_label = wp_res['alternative_label']
+            tag.alternative_wikipedia_url = wp_res['alternative_wikipedia_url']
+            tag.alternative_wikipedia_pageid = wp_res['alternative_pageid']
+            tag.save()
+
+        sys.stdout.write("\n")
+        sys.stdout.flush()
+        
+
+    def backwards(self, orm):        
+        pass
+
+
+    models = {
+        'auth.group': {
+            'Meta': {'object_name': 'Group'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}),
+            'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'})
+        },
+        'auth.permission': {
+            'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'},
+            'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
+            'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
+        },
+        'auth.user': {
+            'Meta': {'object_name': 'User'},
+            'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
+            'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}),
+            'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
+            'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
+            'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
+            'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
+            'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
+            'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
+            'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
+            'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}),
+            'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'})
+        },
+        'contenttypes.contenttype': {
+            'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"},
+            'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
+        },
+        'hdabo.author': {
+            'Meta': {'object_name': 'Author'},
+            'firstname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}),
+            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'lastname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'})
+        },
+        'hdabo.datasheet': {
+            'Meta': {'object_name': 'Datasheet'},
+            'author': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Author']", 'null': 'True', 'blank': 'True'}),
+            'college_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_college_periods']", 'to': "orm['hdabo.TimePeriod']"}),
+            'college_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_college_themes']", 'to': "orm['hdabo.Domain']"}),
+            'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'domains': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_domains']", 'to': "orm['hdabo.Domain']"}),
+            'format': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.DocumentFormat']", 'null': 'True', 'blank': 'True'}),
+            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
+            'highschool_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_highschool_periods']", 'to': "orm['hdabo.TimePeriod']"}),
+            'highschool_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_highschool_themes']", 'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'manual_order': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
+            'modification_datetime': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
+            'organisation': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Organisation']"}),
+            'original_creation_date': ('django.db.models.fields.DateField', [], {}),
+            'original_modification_date': ('django.db.models.fields.DateField', [], {}),
+            'primary_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_primary_periods']", 'to': "orm['hdabo.TimePeriod']"}),
+            'primary_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_primary_themes']", 'to': "orm['hdabo.Domain']"}),
+            'tags': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['hdabo.Tag']", 'through': "orm['hdabo.TaggedSheet']", 'symmetrical': 'False'}),
+            'title': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
+            'town': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Location']", 'null': 'True', 'blank': 'True'}),
+            'url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'validated': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
+            'validation_date': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
+            'validator': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']", 'null': 'True', 'blank': 'True'})
+        },
+        'hdabo.datasheet_college_periods': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_periods'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
+            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
+        },
+        'hdabo.datasheet_college_themes': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_themes'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.datasheet_domains': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_domains'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.datasheet_highschool_periods': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_periods'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
+            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
+        },
+        'hdabo.datasheet_highschool_themes': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_themes'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.datasheet_primary_periods': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_periods'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
+            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
+        },
+        'hdabo.datasheet_primary_themes': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_themes'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.documentformat': {
+            'Meta': {'object_name': 'DocumentFormat'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
+        },
+        'hdabo.domain': {
+            'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'Domain'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
+            'school_period': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.location': {
+            'Meta': {'object_name': 'Location'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'insee': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '5'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '512'})
+        },
+        'hdabo.organisation': {
+            'Meta': {'object_name': 'Organisation'},
+            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'location': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
+            'website': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'})
+        },
+        'hdabo.tag': {
+            'Meta': {'unique_together': "(('label', 'original_label', 'url_status'),)", 'object_name': 'Tag'},
+            'alias': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
+            'alternative_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
+            'alternative_wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
+            'alternative_wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagCategory']", 'null': 'True', 'blank': 'True'}),
+            'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
+            'dbpedia_uri': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
+            'normalized_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
+            'original_label': ('django.db.models.fields.CharField', [], {'max_length': '1024'}),
+            'popularity': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
+            'url_status': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
+            'wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
+            'wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+        },
+        'hdabo.tagcategory': {
+            'Meta': {'object_name': 'TagCategory'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
+        },
+        'hdabo.taggedsheet': {
+            'Meta': {'object_name': 'TaggedSheet'},
+            'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'index_note': ('django.db.models.fields.FloatField', [], {'default': '0.0', 'db_index': 'True'}),
+            'order': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
+            'original_order': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
+            'tag': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Tag']"}),
+            'wikipedia_revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'})
+        },
+        'hdabo.timeperiod': {
+            'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'TimePeriod'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
+            'school_period': ('django.db.models.fields.IntegerField', [], {})
+        }
+    }
+
+    complete_apps = ['hdabo']
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdabo/migrations/0004_wp_category.py	Tue Jan 17 00:18:49 2012 +0100
@@ -0,0 +1,277 @@
+# encoding: utf-8
+import datetime
+from south.db import db
+from south.v2 import SchemaMigration
+from django.db import models
+
+class Migration(SchemaMigration):
+
+    def forwards(self, orm):
+        
+        # Adding model 'TagWpCategory'
+        db.create_table('hdabo_tagwpcategory', (
+            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
+            ('tag', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['hdabo.Tag'])),
+            ('wp_category', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['hdabo.WpCategory'])),
+            ('hidden', self.gf('django.db.models.fields.BooleanField')(default=False)),
+        ))
+        db.send_create_signal('hdabo', ['TagWpCategory'])
+
+        # Adding model 'WpCategory'
+        db.create_table('hdabo_wpcategory', (
+            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
+            ('label', self.gf('django.db.models.fields.CharField')(unique=True, max_length=2048)),
+        ))
+        db.send_create_signal('hdabo', ['WpCategory'])
+
+        # Adding model 'InfoboxParameter'
+        db.create_table('hdabo_infoboxparameter', (
+            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
+            ('tag_infobox', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['hdabo.TagInfobox'])),
+            ('param_name', self.gf('django.db.models.fields.CharField')(max_length=2048)),
+            ('param_value', self.gf('django.db.models.fields.TextField')(null=True, blank=True)),
+        ))
+        db.send_create_signal('hdabo', ['InfoboxParameter'])
+
+        # Adding model 'TagInfobox'
+        db.create_table('hdabo_taginfobox', (
+            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
+            ('tag', self.gf('django.db.models.fields.related.ForeignKey')(related_name='infoboxes', to=orm['hdabo.Tag'])),
+            ('name', self.gf('django.db.models.fields.CharField')(max_length=2048)),
+            ('source', self.gf('django.db.models.fields.TextField')(null=True, blank=True)),
+            ('revision_id', self.gf('django.db.models.fields.BigIntegerField')(null=True, blank=True)),
+        ))
+        db.send_create_signal('hdabo', ['TagInfobox'])
+
+
+    def backwards(self, orm):
+        
+        # Deleting model 'TagWpCategory'
+        db.delete_table('hdabo_tagwpcategory')
+
+        # Deleting model 'WpCategory'
+        db.delete_table('hdabo_wpcategory')
+
+        # Deleting model 'InfoboxParameter'
+        db.delete_table('hdabo_infoboxparameter')
+
+        # Deleting model 'TagInfobox'
+        db.delete_table('hdabo_taginfobox')
+
+
+    models = {
+        'auth.group': {
+            'Meta': {'object_name': 'Group'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}),
+            'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'})
+        },
+        'auth.permission': {
+            'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'},
+            'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
+            'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
+        },
+        'auth.user': {
+            'Meta': {'object_name': 'User'},
+            'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
+            'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}),
+            'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
+            'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
+            'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
+            'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
+            'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
+            'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
+            'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
+            'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}),
+            'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'})
+        },
+        'contenttypes.contenttype': {
+            'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"},
+            'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
+        },
+        'hdabo.author': {
+            'Meta': {'object_name': 'Author'},
+            'firstname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}),
+            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'lastname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'})
+        },
+        'hdabo.datasheet': {
+            'Meta': {'object_name': 'Datasheet'},
+            'author': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Author']", 'null': 'True', 'blank': 'True'}),
+            'college_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_college_periods']", 'to': "orm['hdabo.TimePeriod']"}),
+            'college_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_college_themes']", 'to': "orm['hdabo.Domain']"}),
+            'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'domains': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_domains']", 'to': "orm['hdabo.Domain']"}),
+            'format': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.DocumentFormat']", 'null': 'True', 'blank': 'True'}),
+            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
+            'highschool_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_highschool_periods']", 'to': "orm['hdabo.TimePeriod']"}),
+            'highschool_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_highschool_themes']", 'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'manual_order': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
+            'modification_datetime': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
+            'organisation': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Organisation']"}),
+            'original_creation_date': ('django.db.models.fields.DateField', [], {}),
+            'original_modification_date': ('django.db.models.fields.DateField', [], {}),
+            'primary_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_primary_periods']", 'to': "orm['hdabo.TimePeriod']"}),
+            'primary_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_primary_themes']", 'to': "orm['hdabo.Domain']"}),
+            'tags': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['hdabo.Tag']", 'through': "orm['hdabo.TaggedSheet']", 'symmetrical': 'False'}),
+            'title': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
+            'town': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Location']", 'null': 'True', 'blank': 'True'}),
+            'url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'validated': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
+            'validation_date': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
+            'validator': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']", 'null': 'True', 'blank': 'True'})
+        },
+        'hdabo.datasheet_college_periods': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_periods'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
+            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
+        },
+        'hdabo.datasheet_college_themes': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_themes'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.datasheet_domains': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_domains'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.datasheet_highschool_periods': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_periods'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
+            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
+        },
+        'hdabo.datasheet_highschool_themes': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_themes'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.datasheet_primary_periods': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_periods'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
+            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
+        },
+        'hdabo.datasheet_primary_themes': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_themes'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.documentformat': {
+            'Meta': {'object_name': 'DocumentFormat'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
+        },
+        'hdabo.domain': {
+            'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'Domain'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
+            'school_period': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.infoboxparameter': {
+            'Meta': {'object_name': 'InfoboxParameter'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'param_name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
+            'param_value': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'tag_infobox': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagInfobox']"})
+        },
+        'hdabo.location': {
+            'Meta': {'object_name': 'Location'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'insee': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '5'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '512'})
+        },
+        'hdabo.organisation': {
+            'Meta': {'object_name': 'Organisation'},
+            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'location': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
+            'website': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'})
+        },
+        'hdabo.tag': {
+            'Meta': {'unique_together': "(('label', 'original_label', 'url_status'),)", 'object_name': 'Tag'},
+            'alias': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
+            'alternative_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
+            'alternative_wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
+            'alternative_wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagCategory']", 'null': 'True', 'blank': 'True'}),
+            'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
+            'dbpedia_uri': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
+            'normalized_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
+            'original_label': ('django.db.models.fields.CharField', [], {'max_length': '1024'}),
+            'popularity': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
+            'url_status': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
+            'wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
+            'wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'wp_categories': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['hdabo.WpCategory']", 'through': "orm['hdabo.TagWpCategory']", 'symmetrical': 'False'})
+        },
+        'hdabo.tagcategory': {
+            'Meta': {'object_name': 'TagCategory'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
+        },
+        'hdabo.taggedsheet': {
+            'Meta': {'object_name': 'TaggedSheet'},
+            'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'index_note': ('django.db.models.fields.FloatField', [], {'default': '0.0', 'db_index': 'True'}),
+            'order': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
+            'original_order': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
+            'tag': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Tag']"}),
+            'wikipedia_revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'})
+        },
+        'hdabo.taginfobox': {
+            'Meta': {'object_name': 'TagInfobox'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
+            'revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
+            'source': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'infoboxes'", 'to': "orm['hdabo.Tag']"})
+        },
+        'hdabo.tagwpcategory': {
+            'Meta': {'object_name': 'TagWpCategory'},
+            'hidden': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'tag': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Tag']"}),
+            'wp_category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.WpCategory']"})
+        },
+        'hdabo.timeperiod': {
+            'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'TimePeriod'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
+            'school_period': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.wpcategory': {
+            'Meta': {'object_name': 'WpCategory'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '2048'})
+        }
+    }
+
+    complete_apps = ['hdabo']
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdabo/migrations/0005_wp_db_indexes.py	Tue Jan 17 00:18:49 2012 +0100
@@ -0,0 +1,248 @@
+# encoding: utf-8
+import datetime
+from south.db import db
+from south.v2 import SchemaMigration
+from django.db import models
+
+class Migration(SchemaMigration):
+
+    def forwards(self, orm):
+        
+        # Adding unique constraint on 'TagWpCategory', fields ['wp_category', 'hidden', 'tag']
+        db.create_unique('hdabo_tagwpcategory', ['wp_category_id', 'hidden', 'tag_id'])
+
+        # Adding unique constraint on 'InfoboxParameter', fields ['param_name', 'tag_infobox']
+        db.create_unique('hdabo_infoboxparameter', ['param_name', 'tag_infobox_id'])
+
+        # Adding unique constraint on 'TagInfobox', fields ['revision_id', 'tag', 'name']
+        db.create_unique('hdabo_taginfobox', ['revision_id', 'tag_id', 'name'])
+
+
+    def backwards(self, orm):
+        
+        # Removing unique constraint on 'TagInfobox', fields ['revision_id', 'tag', 'name']
+        db.delete_unique('hdabo_taginfobox', ['revision_id', 'tag_id', 'name'])
+
+        # Removing unique constraint on 'InfoboxParameter', fields ['param_name', 'tag_infobox']
+        db.delete_unique('hdabo_infoboxparameter', ['param_name', 'tag_infobox_id'])
+
+        # Removing unique constraint on 'TagWpCategory', fields ['wp_category', 'hidden', 'tag']
+        db.delete_unique('hdabo_tagwpcategory', ['wp_category_id', 'hidden', 'tag_id'])
+
+
+    models = {
+        'auth.group': {
+            'Meta': {'object_name': 'Group'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}),
+            'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'})
+        },
+        'auth.permission': {
+            'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'},
+            'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
+            'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
+        },
+        'auth.user': {
+            'Meta': {'object_name': 'User'},
+            'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
+            'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}),
+            'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
+            'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
+            'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
+            'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
+            'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
+            'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
+            'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
+            'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}),
+            'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'})
+        },
+        'contenttypes.contenttype': {
+            'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"},
+            'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
+        },
+        'hdabo.author': {
+            'Meta': {'object_name': 'Author'},
+            'firstname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}),
+            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'lastname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'})
+        },
+        'hdabo.datasheet': {
+            'Meta': {'object_name': 'Datasheet'},
+            'author': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Author']", 'null': 'True', 'blank': 'True'}),
+            'college_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_college_periods']", 'to': "orm['hdabo.TimePeriod']"}),
+            'college_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_college_themes']", 'to': "orm['hdabo.Domain']"}),
+            'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'domains': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_domains']", 'to': "orm['hdabo.Domain']"}),
+            'format': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.DocumentFormat']", 'null': 'True', 'blank': 'True'}),
+            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
+            'highschool_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_highschool_periods']", 'to': "orm['hdabo.TimePeriod']"}),
+            'highschool_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_highschool_themes']", 'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'manual_order': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
+            'modification_datetime': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
+            'organisation': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Organisation']"}),
+            'original_creation_date': ('django.db.models.fields.DateField', [], {}),
+            'original_modification_date': ('django.db.models.fields.DateField', [], {}),
+            'primary_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_primary_periods']", 'to': "orm['hdabo.TimePeriod']"}),
+            'primary_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_primary_themes']", 'to': "orm['hdabo.Domain']"}),
+            'tags': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['hdabo.Tag']", 'through': "orm['hdabo.TaggedSheet']", 'symmetrical': 'False'}),
+            'title': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
+            'town': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Location']", 'null': 'True', 'blank': 'True'}),
+            'url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'validated': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
+            'validation_date': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
+            'validator': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']", 'null': 'True', 'blank': 'True'})
+        },
+        'hdabo.datasheet_college_periods': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_periods'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
+            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
+        },
+        'hdabo.datasheet_college_themes': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_themes'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.datasheet_domains': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_domains'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.datasheet_highschool_periods': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_periods'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
+            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
+        },
+        'hdabo.datasheet_highschool_themes': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_themes'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.datasheet_primary_periods': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_periods'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
+            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
+        },
+        'hdabo.datasheet_primary_themes': {
+            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_themes'},
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sort_value': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.documentformat': {
+            'Meta': {'object_name': 'DocumentFormat'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
+        },
+        'hdabo.domain': {
+            'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'Domain'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
+            'school_period': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.infoboxparameter': {
+            'Meta': {'unique_together': "(('tag_infobox', 'param_name'),)", 'object_name': 'InfoboxParameter'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'param_name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
+            'param_value': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'tag_infobox': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagInfobox']"})
+        },
+        'hdabo.location': {
+            'Meta': {'object_name': 'Location'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'insee': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '5'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '512'})
+        },
+        'hdabo.organisation': {
+            'Meta': {'object_name': 'Organisation'},
+            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'location': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
+            'website': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'})
+        },
+        'hdabo.tag': {
+            'Meta': {'unique_together': "(('label', 'original_label', 'url_status'),)", 'object_name': 'Tag'},
+            'alias': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
+            'alternative_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
+            'alternative_wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
+            'alternative_wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagCategory']", 'null': 'True', 'blank': 'True'}),
+            'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
+            'dbpedia_uri': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
+            'normalized_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
+            'original_label': ('django.db.models.fields.CharField', [], {'max_length': '1024'}),
+            'popularity': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
+            'url_status': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
+            'wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
+            'wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'wp_categories': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['hdabo.WpCategory']", 'through': "orm['hdabo.TagWpCategory']", 'symmetrical': 'False'})
+        },
+        'hdabo.tagcategory': {
+            'Meta': {'object_name': 'TagCategory'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
+        },
+        'hdabo.taggedsheet': {
+            'Meta': {'object_name': 'TaggedSheet'},
+            'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
+            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'index_note': ('django.db.models.fields.FloatField', [], {'default': '0.0', 'db_index': 'True'}),
+            'order': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
+            'original_order': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
+            'tag': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Tag']"}),
+            'wikipedia_revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'})
+        },
+        'hdabo.taginfobox': {
+            'Meta': {'unique_together': "(('tag', 'name', 'revision_id'),)", 'object_name': 'TagInfobox'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
+            'revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
+            'source': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'infoboxes'", 'to': "orm['hdabo.Tag']"})
+        },
+        'hdabo.tagwpcategory': {
+            'Meta': {'unique_together': "(('tag', 'wp_category', 'hidden'),)", 'object_name': 'TagWpCategory'},
+            'hidden': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'tag': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Tag']"}),
+            'wp_category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.WpCategory']"})
+        },
+        'hdabo.timeperiod': {
+            'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'TimePeriod'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
+            'school_period': ('django.db.models.fields.IntegerField', [], {})
+        },
+        'hdabo.wpcategory': {
+            'Meta': {'object_name': 'WpCategory'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '2048'})
+        }
+    }
+
+    complete_apps = ['hdabo']
--- a/web/hdabo/models.py	Mon Jan 09 03:19:43 2012 +0100
+++ b/web/hdabo/models.py	Tue Jan 17 00:18:49 2012 +0100
@@ -90,6 +90,16 @@
     class Meta:
         verbose_name_plural = "TagCategories"
 
+class WpCategory(models.Model):
+    label = models.CharField(max_length=2048, unique=True, blank=False, null=False)
+    
+    def __unicode__(self):
+        return unicode(self.label)
+    
+    class Meta:
+        verbose_name_plural = "WpCategories"
+
+
 class Tag(models.Model):
     TAG_URL_STATUS_CHOICES = (
         (0, "null_result"),
@@ -121,6 +131,7 @@
     url_status = models.IntegerField(choices=TAG_URL_STATUS_CHOICES, blank=True, null=True, default=None)
     dbpedia_uri = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True)
     popularity = models.IntegerField(blank=False, null=False, default=0, db_index=True)
+    wp_categories = models.ManyToManyField(WpCategory, through='TagWpCategory')
 
     @Property
     def url_status_text(): #@NoSelf
@@ -397,4 +408,31 @@
 class Datasheet_primary_themes(SortedDatasheetLink):
     domain = models.ForeignKey(Domain, db_index=True, null=False, blank=False)
     
+        
+class TagWpCategory(models.Model):
+    tag = models.ForeignKey(Tag)
+    wp_category = models.ForeignKey(WpCategory)
+    hidden = models.BooleanField(blank=False, null=False)
     
+    class Meta:
+        unique_together = ('tag', 'wp_category', 'hidden')
+
+class TagInfobox(models.Model):
+    tag = models.ForeignKey(Tag, related_name="infoboxes")
+    name = models.CharField(max_length=2048, unique=False, blank=False, null=False)
+    source = models.TextField(unique=False, blank=True, null=True)
+    revision_id = models.BigIntegerField(unique=False, blank=True, null=True)
+    
+    class Meta:
+        unique_together = ('tag','name','revision_id')
+    
+class InfoboxParameter(models.Model):
+    tag_infobox = models.ForeignKey(TagInfobox)
+    param_name = models.CharField(max_length=2048, unique=False, blank=False, null=False)
+    param_value = models.TextField(unique=False, blank=True, null=True)
+
+    class Meta:
+        unique_together = ('tag_infobox','param_name')
+
+
+    
\ No newline at end of file
--- a/web/hdabo/utils.py	Mon Jan 09 03:19:43 2012 +0100
+++ b/web/hdabo/utils.py	Tue Jan 17 00:18:49 2012 +0100
@@ -1,7 +1,8 @@
 # -*- coding: utf-8 -*-
 import collections
 import unicodedata
-
+import sys
+import math
 
 ###
 # allow to declare a property as a decorator
@@ -346,3 +347,18 @@
 
 def normalize(str):
     return remove_accents(str).lower().replace(u"œ",u"oe")
+
+def show_progress(current_line, total_line, label, width):
+
+    percent = (float(current_line) / float(total_line)) * 100.0
+
+    marks = math.floor(width * (percent / 100.0))
+    spaces = math.floor(width - marks)
+
+    loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
+    
+    sys.stdout.write(u"%s %d%% %d/%d - %r\r" % (loader, percent, current_line, total_line, label[:50].rjust(50))) #takes the header into account
+    if percent >= 100:
+        sys.stdout.write("\n")
+    sys.stdout.flush()
+
--- a/web/hdabo/wp_utils.py	Mon Jan 09 03:19:43 2012 +0100
+++ b/web/hdabo/wp_utils.py	Tue Jan 17 00:18:49 2012 +0100
@@ -26,6 +26,7 @@
             return True
     return False
 
+
 def query_wikipedia_title(site, label=None, pageid=None):
     
     params = {'action':'query', 'prop':'info|categories|langlinks', 'inprop':'url', 'lllimit':'500', 'cllimit':'500', 'rvprop':'ids'}
@@ -39,7 +40,7 @@
     response = wpquery.query()
     original_response = response
     def return_null_result():
-        return { 'new_label': None, 'status': Tag.TAG_URL_STATUS_DICT["null_result"], 'wikipedia_url': None, 'pageid': None, 'dbpedia_uri': None, 'revision_id': None, 'response': response }
+        return { 'new_label': None, 'alternative_label': None, 'status': Tag.TAG_URL_STATUS_DICT["null_result"], 'wikipedia_url': None, 'pageid': None, 'alternative_wikipedia_url': None, 'alternative_pageid': None, 'dbpedia_uri': None, 'revision_id': None, 'response': response }
     
 
     query_dict = response['query']
@@ -164,7 +165,16 @@
 def process_tag(site, tag, verbosity=0):
     
     wp_res = query_wikipedia_title(site, label=tag.label)
-    new_label, status, url, pageid, response, dbpedia_uri, revision_id = wp_res['new_label'], wp_res['status'], wp_res['wikipedia_url'], wp_res['pageid'], wp_res['response'], wp_res["dbpedia_uri"], wp_res["revision_id"]
+    new_label = wp_res['new_label']
+    alternative_label= wp_res['alternative_label']
+    status =  wp_res['status']
+    url = wp_res['wikipedia_url']
+    alternative_url = wp_res['alternative_wikipedia_url']
+    pageid = wp_res['pageid']
+    alternative_pageid = wp_res['alternative_pageid']
+    response = wp_res['response']
+    dbpedia_uri =  wp_res["dbpedia_uri"]
+    revision_id = wp_res["revision_id"]
     
     if verbosity >= 2 :
         print "response from query to %s with parameters %s :" % (site.apibase, repr(new_label))
@@ -179,6 +189,9 @@
     tag.wikipedia_url = url
     tag.wikipedia_pageid = pageid
     tag.dbpedia_uri = dbpedia_uri
+    tag.alternative_label = alternative_label
+    tag.alternative_wikipedia_url = alternative_url
+    tag.alternative_wikipedia_pageid = alternative_pageid
         
     tag.save()