Merge with c59383cc9940dc2384d58279273b4f4f13452f8d
authorveltr
Mon, 30 Jan 2012 18:20:59 +0100
changeset 116 0f9cc90c49fa
parent 115 46c0f7a935d1 (current diff)
parent 114 c59383cc9940 (diff)
child 117 dc6c3ac62efa
Merge with c59383cc9940dc2384d58279273b4f4f13452f8d
web/hdabo/management/commands/query_wikipedia_category.py
web/hdabo/migrations/0004_wp_category.py
web/hdabo/migrations/0005_wp_db_indexes.py
--- a/.hgignore	Mon Jan 30 18:20:21 2012 +0100
+++ b/.hgignore	Mon Jan 30 18:20:59 2012 +0100
@@ -18,4 +18,6 @@
 syntax: regexp
 ^virtualenv/res/src/south$
 syntax: regexp
-\.sh$
\ No newline at end of file
+\.sh$
+syntax: regexp
+^web/hdalab/config\.py$
\ No newline at end of file
--- a/.settings/org.eclipse.core.resources.prefs	Mon Jan 30 18:20:21 2012 +0100
+++ b/.settings/org.eclipse.core.resources.prefs	Mon Jan 30 18:20:59 2012 +0100
@@ -1,23 +1,30 @@
-#Mon Jan 16 02:39:01 CET 2012
+#Fri Jan 27 15:11:21 CET 2012
 eclipse.preferences.version=1
 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/haystack/backends/__init__.py=utf-8
 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/fields.py=utf-8
 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/forms.py=utf-8
 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/tests.py=utf-8
 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/whoosh/analysis.py=utf8
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/api.py=utf-8
 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/wiki.py=utf-8
 encoding//web/hdabo/forms.py=utf-8
 encoding//web/hdabo/management/commands/import_csv.py=utf-8
 encoding//web/hdabo/management/commands/import_tag_popularity.py=utf-8
 encoding//web/hdabo/management/commands/query_wikipedia.py=utf-8
-encoding//web/hdabo/management/commands/query_wikipedia_category.py=utf-8
 encoding//web/hdabo/migrations/0001_initial.py=utf-8
 encoding//web/hdabo/migrations/0002_backport_hdabo_sf.py=utf-8
 encoding//web/hdabo/migrations/0003_update_redirection.py=utf-8
-encoding//web/hdabo/migrations/0004_wp_category.py=utf-8
 encoding//web/hdabo/models.py=utf-8
 encoding//web/hdabo/search/french_whoosh_backend.py=utf-8
 encoding//web/hdabo/tests/models.py=utf-8
 encoding//web/hdabo/utils.py=utf-8
 encoding//web/hdabo/views.py=utf-8
 encoding//web/hdabo/wp_utils.py=utf-8
+encoding//web/hdalab/__init__.py=utf-8
+encoding//web/hdalab/config.py=utf-8
+encoding//web/hdalab/manage.py=utf-8
+encoding//web/hdalab/management/commands/export_tags_csv.py=utf-8
+encoding//web/hdalab/management/commands/query_wikipedia_category.py=utf-8
+encoding//web/hdalab/migrations/0001_initial.py=utf-8
+encoding//web/hdalab/settings.py=utf-8
+encoding//web/hdalab/urls.py=utf-8
--- a/web/hdabo/management/commands/query_wikipedia_category.py	Mon Jan 30 18:20:21 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,362 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Created on Jun 7, 2011
-
-@author: ymh
-'''
-
-from django.conf import settings
-from django.core.management.base import NoArgsCommand
-from django.core.management.color import no_style
-from hdabo.models import Tag, WpCategory, TagWpCategory, TagInfobox, InfoboxParameter
-from optparse import make_option
-from wikitools import api,wiki
-import sys
-import re
-import itertools
-from hdabo import utils
-from django.db.models import Count
-from django.db import transaction
-
-
-TYPES_MASK_DICT = {
-        u'visible': 0b001,
-        u'hidden': 0b010,
-        u'infobox': 0b100,
-        u'all': 0b111,
-    }
-
-START_PATTERN = re.compile(u"\{\{\s?Infobox\s+([^|]+)", re.M|re.U|re.I)
-END_PATTERN = re.compile(u"\{\{|\}\}", re.M|re.U)
-SPLIT_PATTERN = re.compile("\s*?\|\s*([\w]+[\w \t-]*)\s*=", re.U|re.M)
-DELIMITER_PATTERN = re.compile("\{{2,3}|\}{2,3}|\[\[|\]\]|\[|\]")
-
-
-
-class Command(NoArgsCommand):
-    '''
-    query and update wikipedia for tag title.
-    '''
-    options = ''
-    help = """query and update wikipedia for tag title."""
-    
-    option_list = NoArgsCommand.option_list + (
-        make_option('--all',
-            action='store_true',
-            dest='all',
-            default=False,
-            help='force all tags to be updated, not only those not yet processed'),
-        make_option('--force',
-            action='store_true',
-            dest='force',
-            default=False,
-            help='ask no questions'),
-        make_option('--random',
-            action='store_true',
-            dest='random',
-            default=False,
-            help='randomize query on tags'),
-        make_option('--site',
-            action='store',
-            type='string',
-            dest='site_url',
-            default="http://fr.wikipedia.org/w/api.php",
-            help='the url for the wikipedia site'),
-        make_option('--limit',
-            action='store',
-            type='int',
-            dest='limit',
-            default= -1,
-            help='number of tag to process'),
-        make_option('--start',
-            action='store',
-            type='int',
-            dest='start',
-            default=0,
-            help='number of tag to ignore'),
-        make_option('--type',
-            action='append',
-            dest='types',
-            type='choice',
-            choices=['visible','hidden', 'infobox', 'all'],
-            default=[],
-            help='what type of query to oerform : visible : visible categories, hidden : hidden categories, infobox: infoboxes, all: all of them. This option can be assed multiple times'
-        ),
-        )
-    
-    
-#    def process_wp_response(self, label, response):        
-#
-#        query_dict = response['query']
-#        # get page if multiple pages or none -> return Tag.null_result
-#        pages = query_dict.get("pages", {})
-#        if len(pages) > 1 or len(pages) == 0:
-#            return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
-#        
-#        page = pages.values()[0]
-#        
-#        if u"invalid" in page or u"missing" in page:
-#            return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
-#
-#        url = page.get(u'fullurl', None)
-#        pageid = page.get(u'pageid', None)
-#        new_label = page[u'title']
-#        
-#        if self.__is_homonymie(page):
-#            status = Tag.TAG_URL_STATUS_DICT["homonyme"]
-#        elif u"redirect" in page:
-#            status = Tag.TAG_URL_STATUS_DICT["redirection"]
-#        else:
-#            status = Tag.TAG_URL_STATUS_DICT["match"]
-#        
-#        return new_label, status, url, pageid 
-
-    def query_all_categories(self, hidden, site, pageid):
-        
-        clshow = 'hidden' if hidden else '!hidden'
-        params = {'action':'query', 'pageids': pageid, 'prop':'categories', 'clshow': clshow}
-        
-        clcontinue = ""        
-        res = []
-        
-        while clcontinue is not None:
-            if clcontinue:
-                params['clcontinue'] = clcontinue
-                
-            wpquery = api.APIRequest(site, params) #@UndefinedVariable
-            response = wpquery.query()
-            
-            query_dict = response.get('query', None)
-            
-            if query_dict is None:
-                return res
-            
-            pages = query_dict.get("pages", {})
-            if len(pages) > 1 or len(pages) == 0:
-                return res
-            
-            page = pages.values()[0]
-                        
-            for cat in page.get('categories',[]):
-                title = cat.get('title',"")
-                title = title[title.find(":")+1:]
-                if title and clcontinue != ("%s|%s" % (pageid,title)):
-                    res.append(title)
-            
-            clcontinue = response.get('query-continue', {}).get('categories',{}).get('clcontinue', None)
-            
-        return res
-    
-    def process_categories(self, cat_list, hidden, tag):
-        
-        for cat in cat_list:
-            wp_cat,created = WpCategory.objects.get_or_create(label=cat) #@UnusedVariable
-            TagWpCategory.objects.get_or_create(tag=tag, wp_category=wp_cat, hidden=hidden)
-            
-                
-    def query_infoboxes(self, site, pageid):
-        
-        res = []
-        params = {'action':'query', 'pageids': pageid, 'prop':'revisions', 'rvprop': 'ids|content'}
-        wpquery = api.APIRequest(site, params) #@UndefinedVariable
-        response = wpquery.query()
-        
-        query_dict = response.get('query', None)
-            
-        if query_dict is None:
-            return res
-            
-        pages = query_dict.get("pages", {})
-        if len(pages) > 1 or len(pages) == 0:
-            return res
-
-        page = pages.values()[0]
-        
-        if 'revisions' not in page or not page['revisions']:
-            return res
-        
-        rev = page['revisions'][0]
-        
-        content = rev['*']
-                
-        start = 0
-        depth = 0
-        current_infobox_name = None
-        current_start = 0
-        
-        while start <= len(content):
-            if depth==0:
-                resm = START_PATTERN.search(content[start:])
-                if resm is None:
-                    break
-                depth = 1
-                current_start = resm.start()+start
-                start += resm.end()+1
-                current_infobox_name = resm.group(1)                    
-            else:
-                resm = END_PATTERN.search(content[start:])
-                if resm is None:
-                    break
-                if resm.group(0) == "{{":
-                    depth += 1
-                elif resm.group(0) == "}}":
-                    depth -= 1
-                if depth == 0:
-                    res.append((content[current_start:resm.end()+start], current_infobox_name))
-                start += resm.end()+1
-
-        
-        return rev['revid'],res
-    
-    def split_infoboxes(self, src):
-        
-        start = 0
-        previous_end = 0
-        split_indexes = []
-        delimiter_stack = []
-        while start<=len(src):            
-            resd = DELIMITER_PATTERN.search(src[start:])
-            ress = SPLIT_PATTERN.search(src[start:]) if len(delimiter_stack) == 0 else None
-            startd = resd.start() if resd is not None else sys.maxint
-            starts = ress.start() if ress is not None else sys.maxint
-            if starts < startd:
-                if len(split_indexes)>0:
-                    split_indexes.append((previous_end, ress.start(0)+start))
-                split_indexes.append((ress.start(1)+start, ress.end(1)+start))
-                start += ress.end(0)
-                previous_end = start
-            elif startd < sys.maxint:
-                if resd.group().startswith("{") or resd.group().startswith("[") :
-                    delimiter_stack.append(resd.group())
-                elif len(delimiter_stack)>0 and ( (delimiter_stack[-1].startswith('{') and resd.group()[0] == '}') or (delimiter_stack[-1].startswith('[') and resd.group()[0] == ']') ) and len(delimiter_stack[-1]) == len(resd.group()):
-                    delimiter_stack.pop()
-                start += resd.end()
-            else:
-                break
-            
-        if previous_end > 0:
-            split_indexes.append((previous_end,len(src)))
-        res = [src[start:end] for start,end in split_indexes]
-        return res
-
-
-
-    def process_infoboxes(self, infobox_defs, tag):
-        
-        if not infobox_defs:
-            return
-        
-        revision_id = infobox_defs[0]
-        for infobox in infobox_defs[1]:
-            src = infobox[0].strip(' \t\n\r')            
-            name = infobox[1]
-            tag_infobox, created = TagInfobox.objects.get_or_create(tag=tag, name=name, revision_id = revision_id, defaults={'source': src})
-            if not created:
-                tag_infobox.source = src
-                tag_infobox.save()
-
-            src = START_PATTERN.sub('',src[:-2]).strip()
-            keyvalues = self.split_infoboxes(src)
-
-            for key,value in itertools.izip(*[itertools.islice(keyvalues, i, None, 2) for i in range(2)]):
-                param, created = InfoboxParameter.objects.get_or_create(tag_infobox=tag_infobox, param_name=key.strip(), defaults={'param_value':value.strip()})
-                if not created:
-                    param.param_value = value.strip()
-                    param.save()
-        
-    def handle_noargs(self, **options):
-        
-        self.style = no_style()
-        
-        interactive = options.get('interactive', True)
-        
-        verbosity = int(options.get('verbosity', '1'))
-        
-        force = options.get('force', False)
-        
-        limit = options.get("limit", -1)
-        start = options.get("start", 0)
-        
-        site_url = options.get('site_url', settings.WIKIPEDIA_API_URL)
-        
-        random = options.get('random', False)
-        
-        types_mask = 0
-        types_list = options.get('types', [])
-        
-        if len(types_list) == 0:
-            types_mask = TYPES_MASK_DICT['all']
-        else:
-            for t in types_list:
-                types_mask |=  TYPES_MASK_DICT[t]
-                
-        if verbosity > 1 :
-            print "types mask %s " % (bin(types_mask))  
-        
-        if verbosity > 2:
-            print "option passed : " + repr(options)
-
-
-        queryset = Tag.objects.exclude(wikipedia_pageid= None)
-        
-        if not options.get('all',False):
-            queryset = queryset.annotate(wpc=Count('wp_categories')).filter(wpc = 0)
-        #else:
-        #    queryset = Tag.objects.filter(url_status=None)                    
-        
-        if random:
-            queryset = queryset.order_by("?")
-        else:
-            queryset = queryset.order_by("label")
-        
-        if limit >= 0:
-            queryset = queryset[start:limit]
-        elif start > 0:
-            queryset = queryset[start:]            
-        
-        if verbosity > 2 :
-            print "Tag Query is %s" % (queryset.query)
-        
-        site = wiki.Wiki(site_url) #@UndefinedVariable
-        
-        
-        count = queryset.count()
-        if verbosity > 1:
-            print "Processing %d tags" % (count)
-        
-        if not force and interactive:
-            confirm = raw_input("You have requested to query and replace the wikipedia information for %d tags.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count))
-        else:
-            confirm = 'yes'
-            
-        if confirm != "yes":
-            print "wikipedia query cancelled"
-            return
-
-        
-        
-        for i, tag in enumerate(queryset):
-            
-            if verbosity > 1:
-                print "processing tag %s (%d/%d)" % (tag.label, i + 1, count)
-            else:
-                utils.show_progress(i + 1, count, tag.label, 60)                            
-
-            # query categories
-            wikipedia_pageid = tag.wikipedia_pageid
-            if tag.url_status == Tag.TAG_URL_STATUS_DICT['redirection'] and tag.alternative_wikipedia_pageid is not None :
-                wikipedia_pageid = tag.alternative_wikipedia_pageid
-
-            with transaction.commit_on_success():
-                if types_mask & TYPES_MASK_DICT['visible']:
-                    res = self.query_all_categories(False, site, wikipedia_pageid)
-                    self.process_categories(res, False, tag)
-    
-                if types_mask & TYPES_MASK_DICT['hidden']:
-                    res = self.query_all_categories(True, site, wikipedia_pageid)
-                    self.process_categories(res, True, tag)
-                
-                if types_mask & TYPES_MASK_DICT['infobox']:
-                    res = self.query_infoboxes(site, wikipedia_pageid)
-                    self.process_infoboxes(res, tag)
-            
--- a/web/hdabo/migrations/0003_update_redirection.py	Mon Jan 30 18:20:21 2012 +0100
+++ b/web/hdabo/migrations/0003_update_redirection.py	Mon Jan 30 18:20:59 2012 +0100
@@ -1,8 +1,5 @@
 # encoding: utf-8
-import datetime
-from south.db import db
 from south.v2 import DataMigration
-from django.db import models
 from hdabo import utils, wp_utils
 import sys
 from wikitools import wiki
@@ -15,7 +12,7 @@
         
         sys.stdout.write("Processing %d tags\n" % (queryset_count))
         sys.stdout.flush()
-        site = wiki.Wiki("http://fr.wikipedia.org/w/api.php")
+        site = wiki.Wiki("http://fr.wikipedia.org/w/api.php") #@UndefinedVariable
         
         
         for i,tag in enumerate(queryset):
--- a/web/hdabo/migrations/0004_wp_category.py	Mon Jan 30 18:20:21 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,277 +0,0 @@
-# encoding: utf-8
-import datetime
-from south.db import db
-from south.v2 import SchemaMigration
-from django.db import models
-
-class Migration(SchemaMigration):
-
-    def forwards(self, orm):
-        
-        # Adding model 'TagWpCategory'
-        db.create_table('hdabo_tagwpcategory', (
-            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
-            ('tag', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['hdabo.Tag'])),
-            ('wp_category', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['hdabo.WpCategory'])),
-            ('hidden', self.gf('django.db.models.fields.BooleanField')(default=False)),
-        ))
-        db.send_create_signal('hdabo', ['TagWpCategory'])
-
-        # Adding model 'WpCategory'
-        db.create_table('hdabo_wpcategory', (
-            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
-            ('label', self.gf('django.db.models.fields.CharField')(unique=True, max_length=2048)),
-        ))
-        db.send_create_signal('hdabo', ['WpCategory'])
-
-        # Adding model 'InfoboxParameter'
-        db.create_table('hdabo_infoboxparameter', (
-            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
-            ('tag_infobox', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['hdabo.TagInfobox'])),
-            ('param_name', self.gf('django.db.models.fields.CharField')(max_length=2048)),
-            ('param_value', self.gf('django.db.models.fields.TextField')(null=True, blank=True)),
-        ))
-        db.send_create_signal('hdabo', ['InfoboxParameter'])
-
-        # Adding model 'TagInfobox'
-        db.create_table('hdabo_taginfobox', (
-            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
-            ('tag', self.gf('django.db.models.fields.related.ForeignKey')(related_name='infoboxes', to=orm['hdabo.Tag'])),
-            ('name', self.gf('django.db.models.fields.CharField')(max_length=2048)),
-            ('source', self.gf('django.db.models.fields.TextField')(null=True, blank=True)),
-            ('revision_id', self.gf('django.db.models.fields.BigIntegerField')(null=True, blank=True)),
-        ))
-        db.send_create_signal('hdabo', ['TagInfobox'])
-
-
-    def backwards(self, orm):
-        
-        # Deleting model 'TagWpCategory'
-        db.delete_table('hdabo_tagwpcategory')
-
-        # Deleting model 'WpCategory'
-        db.delete_table('hdabo_wpcategory')
-
-        # Deleting model 'InfoboxParameter'
-        db.delete_table('hdabo_infoboxparameter')
-
-        # Deleting model 'TagInfobox'
-        db.delete_table('hdabo_taginfobox')
-
-
-    models = {
-        'auth.group': {
-            'Meta': {'object_name': 'Group'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}),
-            'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'})
-        },
-        'auth.permission': {
-            'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'},
-            'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
-            'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
-        },
-        'auth.user': {
-            'Meta': {'object_name': 'User'},
-            'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
-            'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}),
-            'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
-            'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
-            'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
-            'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
-            'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
-            'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
-            'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
-            'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}),
-            'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'})
-        },
-        'contenttypes.contenttype': {
-            'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"},
-            'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
-            'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
-        },
-        'hdabo.author': {
-            'Meta': {'object_name': 'Author'},
-            'firstname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}),
-            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'lastname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'})
-        },
-        'hdabo.datasheet': {
-            'Meta': {'object_name': 'Datasheet'},
-            'author': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Author']", 'null': 'True', 'blank': 'True'}),
-            'college_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_college_periods']", 'to': "orm['hdabo.TimePeriod']"}),
-            'college_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_college_themes']", 'to': "orm['hdabo.Domain']"}),
-            'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
-            'domains': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_domains']", 'to': "orm['hdabo.Domain']"}),
-            'format': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.DocumentFormat']", 'null': 'True', 'blank': 'True'}),
-            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
-            'highschool_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_highschool_periods']", 'to': "orm['hdabo.TimePeriod']"}),
-            'highschool_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_highschool_themes']", 'to': "orm['hdabo.Domain']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'manual_order': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
-            'modification_datetime': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
-            'organisation': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Organisation']"}),
-            'original_creation_date': ('django.db.models.fields.DateField', [], {}),
-            'original_modification_date': ('django.db.models.fields.DateField', [], {}),
-            'primary_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_primary_periods']", 'to': "orm['hdabo.TimePeriod']"}),
-            'primary_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_primary_themes']", 'to': "orm['hdabo.Domain']"}),
-            'tags': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['hdabo.Tag']", 'through': "orm['hdabo.TaggedSheet']", 'symmetrical': 'False'}),
-            'title': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
-            'town': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Location']", 'null': 'True', 'blank': 'True'}),
-            'url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
-            'validated': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
-            'validation_date': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
-            'validator': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']", 'null': 'True', 'blank': 'True'})
-        },
-        'hdabo.datasheet_college_periods': {
-            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_periods'},
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
-            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
-        },
-        'hdabo.datasheet_college_themes': {
-            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_themes'},
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'sort_value': ('django.db.models.fields.IntegerField', [], {})
-        },
-        'hdabo.datasheet_domains': {
-            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_domains'},
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'sort_value': ('django.db.models.fields.IntegerField', [], {})
-        },
-        'hdabo.datasheet_highschool_periods': {
-            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_periods'},
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
-            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
-        },
-        'hdabo.datasheet_highschool_themes': {
-            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_themes'},
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'sort_value': ('django.db.models.fields.IntegerField', [], {})
-        },
-        'hdabo.datasheet_primary_periods': {
-            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_periods'},
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
-            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
-        },
-        'hdabo.datasheet_primary_themes': {
-            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_themes'},
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'sort_value': ('django.db.models.fields.IntegerField', [], {})
-        },
-        'hdabo.documentformat': {
-            'Meta': {'object_name': 'DocumentFormat'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
-        },
-        'hdabo.domain': {
-            'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'Domain'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
-            'school_period': ('django.db.models.fields.IntegerField', [], {})
-        },
-        'hdabo.infoboxparameter': {
-            'Meta': {'object_name': 'InfoboxParameter'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'param_name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
-            'param_value': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
-            'tag_infobox': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagInfobox']"})
-        },
-        'hdabo.location': {
-            'Meta': {'object_name': 'Location'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'insee': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '5'}),
-            'name': ('django.db.models.fields.CharField', [], {'max_length': '512'})
-        },
-        'hdabo.organisation': {
-            'Meta': {'object_name': 'Organisation'},
-            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'location': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}),
-            'name': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
-            'website': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'})
-        },
-        'hdabo.tag': {
-            'Meta': {'unique_together': "(('label', 'original_label', 'url_status'),)", 'object_name': 'Tag'},
-            'alias': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
-            'alternative_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
-            'alternative_wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
-            'alternative_wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
-            'category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagCategory']", 'null': 'True', 'blank': 'True'}),
-            'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
-            'dbpedia_uri': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
-            'normalized_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
-            'original_label': ('django.db.models.fields.CharField', [], {'max_length': '1024'}),
-            'popularity': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
-            'url_status': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
-            'wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
-            'wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
-            'wp_categories': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['hdabo.WpCategory']", 'through': "orm['hdabo.TagWpCategory']", 'symmetrical': 'False'})
-        },
-        'hdabo.tagcategory': {
-            'Meta': {'object_name': 'TagCategory'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
-        },
-        'hdabo.taggedsheet': {
-            'Meta': {'object_name': 'TaggedSheet'},
-            'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'index_note': ('django.db.models.fields.FloatField', [], {'default': '0.0', 'db_index': 'True'}),
-            'order': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
-            'original_order': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
-            'tag': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Tag']"}),
-            'wikipedia_revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'})
-        },
-        'hdabo.taginfobox': {
-            'Meta': {'object_name': 'TagInfobox'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
-            'revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
-            'source': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
-            'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'infoboxes'", 'to': "orm['hdabo.Tag']"})
-        },
-        'hdabo.tagwpcategory': {
-            'Meta': {'object_name': 'TagWpCategory'},
-            'hidden': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'tag': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Tag']"}),
-            'wp_category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.WpCategory']"})
-        },
-        'hdabo.timeperiod': {
-            'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'TimePeriod'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
-            'school_period': ('django.db.models.fields.IntegerField', [], {})
-        },
-        'hdabo.wpcategory': {
-            'Meta': {'object_name': 'WpCategory'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '2048'})
-        }
-    }
-
-    complete_apps = ['hdabo']
--- a/web/hdabo/migrations/0005_wp_db_indexes.py	Mon Jan 30 18:20:21 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,248 +0,0 @@
-# encoding: utf-8
-import datetime
-from south.db import db
-from south.v2 import SchemaMigration
-from django.db import models
-
-class Migration(SchemaMigration):
-
-    def forwards(self, orm):
-        
-        # Adding unique constraint on 'TagWpCategory', fields ['wp_category', 'hidden', 'tag']
-        db.create_unique('hdabo_tagwpcategory', ['wp_category_id', 'hidden', 'tag_id'])
-
-        # Adding unique constraint on 'InfoboxParameter', fields ['param_name', 'tag_infobox']
-        db.create_unique('hdabo_infoboxparameter', ['param_name', 'tag_infobox_id'])
-
-        # Adding unique constraint on 'TagInfobox', fields ['revision_id', 'tag', 'name']
-        db.create_unique('hdabo_taginfobox', ['revision_id', 'tag_id', 'name'])
-
-
-    def backwards(self, orm):
-        
-        # Removing unique constraint on 'TagInfobox', fields ['revision_id', 'tag', 'name']
-        db.delete_unique('hdabo_taginfobox', ['revision_id', 'tag_id', 'name'])
-
-        # Removing unique constraint on 'InfoboxParameter', fields ['param_name', 'tag_infobox']
-        db.delete_unique('hdabo_infoboxparameter', ['param_name', 'tag_infobox_id'])
-
-        # Removing unique constraint on 'TagWpCategory', fields ['wp_category', 'hidden', 'tag']
-        db.delete_unique('hdabo_tagwpcategory', ['wp_category_id', 'hidden', 'tag_id'])
-
-
-    models = {
-        'auth.group': {
-            'Meta': {'object_name': 'Group'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}),
-            'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'})
-        },
-        'auth.permission': {
-            'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'},
-            'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
-            'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
-        },
-        'auth.user': {
-            'Meta': {'object_name': 'User'},
-            'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
-            'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}),
-            'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
-            'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
-            'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
-            'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
-            'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
-            'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
-            'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}),
-            'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}),
-            'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'})
-        },
-        'contenttypes.contenttype': {
-            'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"},
-            'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
-            'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
-        },
-        'hdabo.author': {
-            'Meta': {'object_name': 'Author'},
-            'firstname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}),
-            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'lastname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'})
-        },
-        'hdabo.datasheet': {
-            'Meta': {'object_name': 'Datasheet'},
-            'author': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Author']", 'null': 'True', 'blank': 'True'}),
-            'college_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_college_periods']", 'to': "orm['hdabo.TimePeriod']"}),
-            'college_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_college_themes']", 'to': "orm['hdabo.Domain']"}),
-            'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
-            'domains': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_domains']", 'to': "orm['hdabo.Domain']"}),
-            'format': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.DocumentFormat']", 'null': 'True', 'blank': 'True'}),
-            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
-            'highschool_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_highschool_periods']", 'to': "orm['hdabo.TimePeriod']"}),
-            'highschool_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_highschool_themes']", 'to': "orm['hdabo.Domain']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'manual_order': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
-            'modification_datetime': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
-            'organisation': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Organisation']"}),
-            'original_creation_date': ('django.db.models.fields.DateField', [], {}),
-            'original_modification_date': ('django.db.models.fields.DateField', [], {}),
-            'primary_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_primary_periods']", 'to': "orm['hdabo.TimePeriod']"}),
-            'primary_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_primary_themes']", 'to': "orm['hdabo.Domain']"}),
-            'tags': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['hdabo.Tag']", 'through': "orm['hdabo.TaggedSheet']", 'symmetrical': 'False'}),
-            'title': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
-            'town': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Location']", 'null': 'True', 'blank': 'True'}),
-            'url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
-            'validated': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
-            'validation_date': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}),
-            'validator': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']", 'null': 'True', 'blank': 'True'})
-        },
-        'hdabo.datasheet_college_periods': {
-            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_periods'},
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
-            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
-        },
-        'hdabo.datasheet_college_themes': {
-            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_themes'},
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'sort_value': ('django.db.models.fields.IntegerField', [], {})
-        },
-        'hdabo.datasheet_domains': {
-            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_domains'},
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'sort_value': ('django.db.models.fields.IntegerField', [], {})
-        },
-        'hdabo.datasheet_highschool_periods': {
-            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_periods'},
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
-            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
-        },
-        'hdabo.datasheet_highschool_themes': {
-            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_themes'},
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'sort_value': ('django.db.models.fields.IntegerField', [], {})
-        },
-        'hdabo.datasheet_primary_periods': {
-            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_periods'},
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'sort_value': ('django.db.models.fields.IntegerField', [], {}),
-            'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"})
-        },
-        'hdabo.datasheet_primary_themes': {
-            'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_themes'},
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'sort_value': ('django.db.models.fields.IntegerField', [], {})
-        },
-        'hdabo.documentformat': {
-            'Meta': {'object_name': 'DocumentFormat'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
-        },
-        'hdabo.domain': {
-            'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'Domain'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
-            'school_period': ('django.db.models.fields.IntegerField', [], {})
-        },
-        'hdabo.infoboxparameter': {
-            'Meta': {'unique_together': "(('tag_infobox', 'param_name'),)", 'object_name': 'InfoboxParameter'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'param_name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
-            'param_value': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
-            'tag_infobox': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagInfobox']"})
-        },
-        'hdabo.location': {
-            'Meta': {'object_name': 'Location'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'insee': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '5'}),
-            'name': ('django.db.models.fields.CharField', [], {'max_length': '512'})
-        },
-        'hdabo.organisation': {
-            'Meta': {'object_name': 'Organisation'},
-            'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'location': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}),
-            'name': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
-            'website': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'})
-        },
-        'hdabo.tag': {
-            'Meta': {'unique_together': "(('label', 'original_label', 'url_status'),)", 'object_name': 'Tag'},
-            'alias': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
-            'alternative_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
-            'alternative_wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
-            'alternative_wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
-            'category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagCategory']", 'null': 'True', 'blank': 'True'}),
-            'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
-            'dbpedia_uri': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
-            'normalized_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
-            'original_label': ('django.db.models.fields.CharField', [], {'max_length': '1024'}),
-            'popularity': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
-            'url_status': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
-            'wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
-            'wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
-            'wp_categories': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['hdabo.WpCategory']", 'through': "orm['hdabo.TagWpCategory']", 'symmetrical': 'False'})
-        },
-        'hdabo.tagcategory': {
-            'Meta': {'object_name': 'TagCategory'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
-        },
-        'hdabo.taggedsheet': {
-            'Meta': {'object_name': 'TaggedSheet'},
-            'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
-            'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'index_note': ('django.db.models.fields.FloatField', [], {'default': '0.0', 'db_index': 'True'}),
-            'order': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
-            'original_order': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
-            'tag': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Tag']"}),
-            'wikipedia_revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'})
-        },
-        'hdabo.taginfobox': {
-            'Meta': {'unique_together': "(('tag', 'name', 'revision_id'),)", 'object_name': 'TagInfobox'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
-            'revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
-            'source': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
-            'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'infoboxes'", 'to': "orm['hdabo.Tag']"})
-        },
-        'hdabo.tagwpcategory': {
-            'Meta': {'unique_together': "(('tag', 'wp_category', 'hidden'),)", 'object_name': 'TagWpCategory'},
-            'hidden': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'tag': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Tag']"}),
-            'wp_category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.WpCategory']"})
-        },
-        'hdabo.timeperiod': {
-            'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'TimePeriod'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
-            'school_period': ('django.db.models.fields.IntegerField', [], {})
-        },
-        'hdabo.wpcategory': {
-            'Meta': {'object_name': 'WpCategory'},
-            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
-            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '2048'})
-        }
-    }
-
-    complete_apps = ['hdabo']
--- a/web/hdabo/models.py	Mon Jan 30 18:20:21 2012 +0100
+++ b/web/hdabo/models.py	Mon Jan 30 18:20:59 2012 +0100
@@ -90,16 +90,6 @@
     class Meta:
         verbose_name_plural = "TagCategories"
 
-class WpCategory(models.Model):
-    label = models.CharField(max_length=2048, unique=True, blank=False, null=False)
-    
-    def __unicode__(self):
-        return unicode(self.label)
-    
-    class Meta:
-        verbose_name_plural = "WpCategories"
-
-
 class Tag(models.Model):
     TAG_URL_STATUS_CHOICES = (
         (0, "null_result"),
@@ -131,7 +121,6 @@
     url_status = models.IntegerField(choices=TAG_URL_STATUS_CHOICES, blank=True, null=True, default=None)
     dbpedia_uri = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True)
     popularity = models.IntegerField(blank=False, null=False, default=0, db_index=True)
-    wp_categories = models.ManyToManyField(WpCategory, through='TagWpCategory')
 
     @Property
     def url_status_text(): #@NoSelf
@@ -409,30 +398,5 @@
     domain = models.ForeignKey(Domain, db_index=True, null=False, blank=False)
     
         
-class TagWpCategory(models.Model):
-    tag = models.ForeignKey(Tag)
-    wp_category = models.ForeignKey(WpCategory)
-    hidden = models.BooleanField(blank=False, null=False)
-    
-    class Meta:
-        unique_together = ('tag', 'wp_category', 'hidden')
-
-class TagInfobox(models.Model):
-    tag = models.ForeignKey(Tag, related_name="infoboxes")
-    name = models.CharField(max_length=2048, unique=False, blank=False, null=False)
-    source = models.TextField(unique=False, blank=True, null=True)
-    revision_id = models.BigIntegerField(unique=False, blank=True, null=True)
-    
-    class Meta:
-        unique_together = ('tag','name','revision_id')
-    
-class InfoboxParameter(models.Model):
-    tag_infobox = models.ForeignKey(TagInfobox)
-    param_name = models.CharField(max_length=2048, unique=False, blank=False, null=False)
-    param_value = models.TextField(unique=False, blank=True, null=True)
-
-    class Meta:
-        unique_together = ('tag_infobox','param_name')
-
 
     
\ No newline at end of file
--- a/web/hdabo/utils.py	Mon Jan 30 18:20:21 2012 +0100
+++ b/web/hdabo/utils.py	Mon Jan 30 18:20:59 2012 +0100
@@ -3,6 +3,7 @@
 import unicodedata
 import sys
 import math
+import codecs
 
 ###
 # allow to declare a property as a decorator
@@ -348,7 +349,12 @@
 def normalize(str):
     return remove_accents(str).lower().replace(u"Å“",u"oe")
 
-def show_progress(current_line, total_line, label, width):
+def show_progress(current_line, total_line, label, width, writer=None):
+
+    if writer is None:
+        writer = sys.stdout
+        if sys.stdout.encoding is not None:
+            writer = codecs.getwriter(sys.stdout.encoding)(sys.stdout)
 
     percent = (float(current_line) / float(total_line)) * 100.0
 
@@ -356,9 +362,13 @@
     spaces = math.floor(width - marks)
 
     loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
+        
+    s = u"%s %3d%% %*d/%d - %*s\r" % (loader, percent, len(str(total_line)), current_line, total_line, width, label[:width])
     
-    sys.stdout.write(u"%s %d%% %d/%d - %r\r" % (loader, percent, current_line, total_line, label[:50].rjust(50))) #takes the header into account
+    writer.write(s) #takes the header into account
     if percent >= 100:
-        sys.stdout.write("\n")
-    sys.stdout.flush()
+        writer.write("\n")
+    writer.flush()
+    
+    return writer
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/__init__.py	Mon Jan 30 18:20:59 2012 +0100
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+VERSION = (0, 1, 0, "final", 0)
+
+
+def get_version():
+    version = '%s.%s' % (VERSION[0], VERSION[1])
+    if VERSION[2]:
+        version = '%s.%s' % (version, VERSION[2])
+    if VERSION[3:] == ('alpha', 0):
+        version = '%s pre-alpha' % version
+    else:
+        if VERSION[3] != 'final':
+            version = '%s %s %s' % (version, VERSION[3], VERSION[4])
+    return version
+
+
+__version__ = get_version()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/config.py.tmpl	Mon Jan 30 18:20:59 2012 +0100
@@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jan 26, 2012
+
+@author: ymh
+'''
+import os
+
+DEBUG = True
+TEMPLATE_DEBUG = DEBUG
+
+ADMINS = (
+    # ('Your Name', 'your_email@example.com'),
+)
+
+MANAGERS = ADMINS
+
+DATABASES = {
+    'default': {
+        'ENGINE': 'django.db.backends.', # Add 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'.
+        'NAME': '',                      # Or path to database file if using sqlite3.
+        'USER': '',                      # Not used with sqlite3.
+        'PASSWORD': '',                  # Not used with sqlite3.
+        'HOST': '',                      # Set to empty string for localhost. Not used with sqlite3.
+        'PORT': '',                      # Set to empty string for default. Not used with sqlite3.
+    }
+}
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__)).rstrip("/") + "/"
+BASE_URL = '/~ymh/hdabo/'
+WEB_URL = 'http://localhost'
+
+
+# Absolute filesystem path to the directory that will hold user-uploaded files.
+# Example: "/home/media/media.lawrence.com/media/"
+MEDIA_ROOT = os.path.abspath(BASE_DIR + "../static/media/")
+
+# URL that handles the media served from MEDIA_ROOT. Make sure to use a
+# trailing slash.
+# Examples: "http://media.lawrence.com/media/", "http://example.com/media/"
+MEDIA_URL = BASE_URL + "static/media/"
+
+# Absolute path to the directory static files should be collected to.
+# Don't put anything in this directory yourself; store your static files
+# in apps' "static/" subdirectories and in STATICFILES_DIRS.
+# Example: "/home/media/media.lawrence.com/static/"
+STATIC_ROOT = os.path.abspath(BASE_DIR + "../static/site/")
+
+# URL prefix for static files.
+# Example: "http://media.lawrence.com/static/"
+STATIC_URL = BASE_URL + "static/site/"
+
+# URL prefix for admin static files -- CSS, JavaScript and images.
+# Make sure to use a trailing slash.
+# Examples: "http://foo.com/static/admin/", "/static/admin/".
+ADMIN_MEDIA_PREFIX = STATIC_URL + 'admin/'
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/manage.py	Mon Jan 30 18:20:59 2012 +0100
@@ -0,0 +1,15 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from django.core.management import execute_manager
+import imp
+try:
+    imp.find_module('settings') # Assumed to be in the same directory.
+except ImportError:
+    import sys
+    sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n" % __file__)
+    sys.exit(1)
+
+import settings
+
+if __name__ == "__main__":
+    execute_manager(settings)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/management/commands/export_tags_csv.py	Mon Jan 30 18:20:59 2012 +0100
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jan 25, 2012
+
+@author: ymh
+'''
+
+from django.core.management.base import BaseCommand, CommandError
+from django.db.models import Q
+from hdabo import utils
+from hdabo.models import Tag
+from optparse import make_option
+import csv
+import cStringIO
+import codecs
+
+class UnicodeWriter:
+    """
+    A CSV writer which will write rows to CSV file "f",
+    which is encoded in the given encoding.
+    """
+
+    def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
+        # Redirect output to a queue
+        self.queue = cStringIO.StringIO()
+        self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
+        self.stream = f
+        self.encoder = codecs.getincrementalencoder(encoding)()
+
+    def writerow(self, row):
+        self.writer.writerow([s.encode("utf-8") for s in row])
+        # Fetch UTF-8 output from the queue ...
+        data = self.queue.getvalue()
+        data = data.decode("utf-8")
+        # ... and reencode it into the target encoding
+        data = self.encoder.encode(data)
+        # write to the target stream
+        self.stream.write(data)
+        # empty queue
+        self.queue.truncate(0)
+
+    def writerows(self, rows):
+        for row in rows:
+            self.writerow(row)
+
+class Command(BaseCommand):
+    '''
+    Command to export tags
+    '''
+    args = '<path_to_name_base_csv_file>'
+    options = '[-c|--category filter by category name] [-e|--encoding csv file encoding]'
+    help = """export csv files for hdabo
+Options:
+    -c, --category : filter by category
+    --lines : max number of lines to load (for each file). 0 means all.
+    --encoding : files encoding. default to latin-1"""
+    
+    option_list = BaseCommand.option_list + (
+        make_option("-c","--category",
+            action='append',
+            type='string',
+            dest='categories',
+            default=[],
+            help='filter tag by category (or)'),
+        make_option("-e","--encoding",
+            action='store',
+            type='string',
+            dest='encoding',
+            default="utf-8",
+            help='file encoding, default utf-8'),
+        make_option("-f","--force",
+            action='store_true',
+            dest='force',
+            default=False,
+            help='force file overwrite'),
+        )
+        
+    def handle(self, *args, **options):
+        
+        if len(args) == 0 or not args[0]:
+            raise CommandError("Gives at last one csv file to export")
+
+        self.encoding = options.get("encoding", "utf-8")
+        self.categories = options.get("categories", [])
+        self.force = options.get("force", False)
+        self.base_path = args[0].strip()
+        self.interactive = options.get("interactive",True)
+        
+        files_path = {
+            "visible" : { 'path':self.base_path + "_visible.txt",},
+            "hidden" : { 'path':self.base_path + "_hidden.txt",},
+            "infobox" : { 'path':self.base_path + "_infobox.txt",},
+        }
+        
+        try:
+            for filedef in files_path.values():
+                try:
+                    filedef['file'] = open(filedef['path'],'r')
+                    if (not self.force) and self.interactive:
+                        print filedef['path']
+                        resp = raw_input("export file already exists. override ? type yes to continue : ")
+                        if resp is not None and (resp.lower() == "yes" or resp.lower() == "y"):
+                            self.force = True
+                            # clear file
+                        else:
+                            return "error"
+                    elif not self.interactive and not self.force:
+                        print "Export file %s already exists. Exit." % (filedef['path'])
+                        return "error"
+
+                    filedef['file'].close()
+                    filedef['file'] = open(filedef['path'],'w')
+                except IOError:
+                    filedef['file'] = open(filedef['path'],'w')
+                
+                filedef['csv'] = UnicodeWriter(filedef['file'], doublequote=False, escapechar="\\", encoding=self.encoding)
+                
+            queryset = Tag.objects.exclude(wikipedia_pageid= None)
+            cat_filter = None
+            for cat in self.categories:
+                if cat_filter is None:
+                    cat_filter = Q(category__label = cat)
+                else:
+                    cat_filter = cat_filter | Q(category__label = cat)
+            if cat_filter is not None:
+                queryset = queryset.filter(cat_filter)
+            
+            tcount = queryset.count()
+            
+            print "Exporting %d tags" % (tcount)
+            writer = None
+                
+            for i,t in enumerate(queryset.order_by("label")):
+                
+                writer = utils.show_progress(i+1, tcount, t.label, 50, writer)
+                #normal category
+                row = [t.label,] + [cat.wp_category.label for cat in t.wp_categories.filter(hidden=False)]
+                files_path['visible']['csv'].writerow(row)
+                
+                #hidden category
+                row = [t.label,] + [cat.wp_category.label for cat in t.wp_categories.filter(hidden=True)]
+                files_path['hidden']['csv'].writerow(row)
+                
+                #infobox
+                for i in t.infoboxes.all():
+                    vec = [[p.param_name,p.param_value.replace('\n',"\\n")] for p in i.infoboxparameter_set.all()]
+                    ib_params = [num for elem in vec for num in elem]
+                    row = [t.label, i.name.strip()] + ib_params
+                    files_path['infobox']['csv'].writerow(row)
+            
+        finally:
+            for filedef in files_path.itervalues():
+                if filedef.get('file',None):
+                    filedef['file'].close()
+
+            
+        
+        
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/management/commands/query_wikipedia_category.py	Mon Jan 30 18:20:59 2012 +0100
@@ -0,0 +1,396 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jun 7, 2011
+
+@author: ymh
+'''
+
+from django.conf import settings
+from django.core.management.base import NoArgsCommand
+from django.core.management.color import no_style
+from hdabo.models import Tag
+from hdalab.models import WpCategory, TagWpCategory, TagInfobox, InfoboxParameter
+from optparse import make_option
+from wikitools import api,wiki
+import sys
+import re
+import itertools
+from hdabo import utils
+from django.db.models import Count
+from django.db import transaction
+
+
+TYPES_MASK_DICT = {
+        u'visible': 0b001,
+        u'hidden': 0b010,
+        u'infobox': 0b100,
+        u'all': 0b111,
+    }
+
+START_PATTERN = re.compile(u"\{\{\s?Infobox\s+([^|]+)", re.M|re.U|re.I)
+END_PATTERN = re.compile(u"\{\{|\}\}", re.M|re.U)
+SPLIT_PATTERN = re.compile("\s*?\|\s*([\w]+[^=|]*)\s*=", re.U|re.M)
+DELIMITER_PATTERN = re.compile("\{{2,3}|\}{2,3}|\[\[|\]\]|\[|\]")
+COMMENT_PATTERN = re.compile("<!--.*?-->",re.U|re.M)
+
+
+
+class Command(NoArgsCommand):
+    '''
+    query and update wikipedia for tag title.
+    '''
+    options = ''
+    help = """query and update wikipedia for tag title."""
+    
+    option_list = NoArgsCommand.option_list + (
+        make_option('--all',
+            action='store_true',
+            dest='all',
+            default=False,
+            help='force all tags to be updated, not only those not yet processed'),
+        make_option('--force',
+            action='store_true',
+            dest='force',
+            default=False,
+            help='ask no questions'),
+        make_option('--random',
+            action='store_true',
+            dest='random',
+            default=False,
+            help='randomize query on tags'),
+        make_option('--site',
+            action='store',
+            type='string',
+            dest='site_url',
+            default="http://fr.wikipedia.org/w/api.php",
+            help='the url for the wikipedia site'),
+        make_option('--limit',
+            action='store',
+            type='int',
+            dest='limit',
+            default= -1,
+            help='number of tag to process'),
+        make_option('--start',
+            action='store',
+            type='int',
+            dest='start',
+            default=0,
+            help='number of tag to ignore'),
+        make_option('--type',
+            action='append',
+            dest='types',
+            type='choice',
+            choices=['visible','hidden', 'infobox', 'all'],
+            default=[],
+            help='what type of query to perform : visible : visible categories, hidden : hidden categories, infobox: infoboxes, all: all of them. This option can be assed multiple times'),
+        make_option('--use-label',
+            action='store_true',
+            dest='use_label',
+            default=False,
+            help='use label instead of pageid to query wikipedia'),
+        make_option('--tag',
+            action='append',
+            dest='tags',
+            type='string',
+            default=[],
+            help='the tag to query'),
+
+    )
+    
+    
+#    def process_wp_response(self, label, response):        
+#
+#        query_dict = response['query']
+#        # get page if multiple pages or none -> return Tag.null_result
+#        pages = query_dict.get("pages", {})
+#        if len(pages) > 1 or len(pages) == 0:
+#            return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
+#        
+#        page = pages.values()[0]
+#        
+#        if u"invalid" in page or u"missing" in page:
+#            return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
+#
+#        url = page.get(u'fullurl', None)
+#        pageid = page.get(u'pageid', None)
+#        new_label = page[u'title']
+#        
+#        if self.__is_homonymie(page):
+#            status = Tag.TAG_URL_STATUS_DICT["homonyme"]
+#        elif u"redirect" in page:
+#            status = Tag.TAG_URL_STATUS_DICT["redirection"]
+#        else:
+#            status = Tag.TAG_URL_STATUS_DICT["match"]
+#        
+#        return new_label, status, url, pageid 
+
+    def query_all_categories(self, hidden, site, pageid, use_label):
+        
+        clshow = 'hidden' if hidden else '!hidden'
+        params = {'action':'query', 'titles' if use_label else 'pageids': pageid, 'prop':'categories', 'clshow': clshow}
+        
+        clcontinue = ""        
+        res = []
+        
+        while clcontinue is not None:
+            if clcontinue:
+                params['clcontinue'] = clcontinue
+                
+            wpquery = api.APIRequest(site, params) #@UndefinedVariable
+            response = wpquery.query()
+            
+            if self.verbosity > 1:
+                print "Query infoboxes : " + repr(wpquery.request.get_full_url()+"?"+wpquery.request.get_data())
+                print repr(response)
+
+            
+            query_dict = response.get('query', None)
+            
+            if query_dict is None:
+                return res
+            
+            pages = query_dict.get("pages", {})
+            if len(pages) > 1 or len(pages) == 0:
+                return res
+            
+            page = pages.values()[0]
+                        
+            for cat in page.get('categories',[]):
+                title = cat.get('title',"")
+                title = title[title.find(":")+1:]
+                if title and clcontinue != ("%s|%s" % (pageid,title)):
+                    res.append(title)
+            
+            clcontinue = response.get('query-continue', {}).get('categories',{}).get('clcontinue', None)
+
+        if self.verbosity > 1:
+            print "Query infoboxes RES: "
+            print repr(res)
+            
+        return res
+    
+    def process_categories(self, cat_list, hidden, tag):
+        
+        for cat in cat_list:
+            wp_cat,created = WpCategory.objects.get_or_create(label=cat) #@UnusedVariable
+            TagWpCategory.objects.get_or_create(tag=tag, wp_category=wp_cat, hidden=hidden)
+            
+                
+    def query_infoboxes(self, site, pageid, use_label):
+        
+        res = []
+        params = {'action':'query', 'titles' if use_label else 'pageids': pageid, 'prop':'revisions', 'rvprop': 'ids|content'}
+        wpquery = api.APIRequest(site, params) #@UndefinedVariable
+        response = wpquery.query()
+        
+        query_dict = response.get('query', None)
+            
+        if query_dict is None:
+            return res
+            
+        pages = query_dict.get("pages", {})
+        if len(pages) > 1 or len(pages) == 0:
+            return res
+
+        page = pages.values()[0]
+        
+        if 'revisions' not in page or not page['revisions']:
+            return res
+        
+        rev = page['revisions'][0]
+        
+        content = rev['*']
+                
+        start = 0
+        depth = 0
+        current_infobox_name = None
+        current_start = 0
+        
+        while start <= len(content):
+            if depth==0:
+                resm = START_PATTERN.search(content[start:])
+                if resm is None:
+                    break
+                depth = 1
+                current_start = resm.start()+start
+                start += resm.end()+1
+                current_infobox_name = resm.group(1)                    
+            else:
+                resm = END_PATTERN.search(content[start:])
+                if resm is None:
+                    break
+                if resm.group(0) == "{{":
+                    depth += 1
+                elif resm.group(0) == "}}":
+                    depth -= 1
+                if depth == 0:
+                    res.append((content[current_start:resm.end()+start], current_infobox_name))
+                start += resm.end()+1
+
+        return_val = (rev['revid'],res)
+        
+        if self.verbosity > 1:
+            print "Query infoboxes url: " + repr(wpquery.request.get_full_url()+"?"+wpquery.request.get_data())
+            print repr(return_val)
+        
+        return return_val
+    
+    def split_infoboxes(self, src):
+        
+        start = 0
+        previous_end = 0
+        split_indexes = []
+        delimiter_stack = []
+        while start<=len(src):            
+            resd = DELIMITER_PATTERN.search(src[start:])
+            ress = SPLIT_PATTERN.search(src[start:]) if len(delimiter_stack) == 0 else None
+            startd = resd.start() if resd is not None else sys.maxint
+            starts = ress.start() if ress is not None else sys.maxint
+            if starts < startd:
+                if len(split_indexes)>0:
+                    split_indexes.append((previous_end, ress.start(0)+start))
+                split_indexes.append((ress.start(1)+start, ress.end(1)+start))
+                start += ress.end(0)
+                previous_end = start
+            elif startd < sys.maxint:
+                if resd.group().startswith("{") or resd.group().startswith("[") :
+                    delimiter_stack.append(resd.group())
+                elif len(delimiter_stack)>0 and ( (delimiter_stack[-1].startswith('{') and resd.group()[0] == '}') or (delimiter_stack[-1].startswith('[') and resd.group()[0] == ']') ) and len(delimiter_stack[-1]) == len(resd.group()):
+                    delimiter_stack.pop()
+                start += resd.end()
+            else:
+                break
+            
+        if previous_end > 0:
+            split_indexes.append((previous_end,len(src)))
+        res = [src[start:end] for start,end in split_indexes]
+        return res
+
+
+
+    def process_infoboxes(self, infobox_defs, tag):
+        
+        if not infobox_defs:
+            return
+        
+        revision_id = infobox_defs[0]
+        for infobox in infobox_defs[1]:
+            src = infobox[0].strip(' \t\n\r')            
+            name = infobox[1]
+            tag_infobox, created = TagInfobox.objects.get_or_create(tag=tag, name=name, revision_id = revision_id, defaults={'source': src})
+            if not created:
+                tag_infobox.source = src
+                tag_infobox.save()
+
+            src = COMMENT_PATTERN.sub('',src)
+            src = START_PATTERN.sub('',src[:-2]).strip()
+            
+            keyvalues = self.split_infoboxes(src)
+
+            for key,value in itertools.izip(*[itertools.islice(keyvalues, i, None, 2) for i in range(2)]):
+                param, created = InfoboxParameter.objects.get_or_create(tag_infobox=tag_infobox, param_name=key.strip(), defaults={'param_value':value.strip()})
+                if not created:
+                    param.param_value = value.strip()
+                    param.save()
+        
+    def handle_noargs(self, **options):
+        
+        self.style = no_style()
+        
+        interactive = options.get('interactive', True)
+        
+        self.verbosity = int(options.get('verbosity', '1'))
+        use_label = options.get('use_label', False)
+        
+        force = options.get('force', False)
+        
+        limit = options.get("limit", -1)
+        start = options.get("start", 0)
+        
+        site_url = options.get('site_url', settings.WIKIPEDIA_API_URL)
+        
+        random = options.get('random', False)
+        
+        types_mask = 0
+        types_list = options.get('types', [])
+        
+        if len(types_list) == 0:
+            types_mask = TYPES_MASK_DICT['all']
+        else:
+            for t in types_list:
+                types_mask |=  TYPES_MASK_DICT[t]
+                
+        if self.verbosity > 1 :
+            print "types mask %s " % (bin(types_mask))  
+        
+        if self.verbosity > 2:
+            print "option passed : " + repr(options)
+
+
+        queryset = Tag.objects.exclude(wikipedia_pageid= None)
+        
+        tag_list = options.get("tags", []);
+        
+        if tag_list:
+            queryset = queryset.filter(label__in=tag_list)
+        elif not options.get('all',False):            
+            queryset = queryset.annotate(wpc=Count('wp_categories')).filter(wpc = 0)
+        #else:
+        #    queryset = Tag.objects.filter(url_status=None)                    
+        
+        if random:
+            queryset = queryset.order_by("?")
+        else:
+            queryset = queryset.order_by("label")
+        
+        if limit >= 0:
+            queryset = queryset[start:limit]
+        elif start > 0:
+            queryset = queryset[start:]            
+        
+        if self.verbosity > 2 :
+            print "Tag Query is %s" % (queryset.query)
+        
+        site = wiki.Wiki(site_url) #@UndefinedVariable
+        
+        
+        count = queryset.count()
+        if self.verbosity > 1:
+            print "Processing %d tags" % (count)
+        
+        if not force and interactive:
+            confirm = raw_input("You have requested to query and replace the wikipedia information for %d tags.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count))
+        else:
+            confirm = 'yes'
+            
+        if confirm != "yes":
+            print "wikipedia query cancelled"
+            return
+
+        
+        
+        for i, tag in enumerate(queryset):
+            
+            if self.verbosity > 1:
+                print "processing tag %s (%d/%d)" % (tag.label, i + 1, count)
+            else:
+                utils.show_progress(i + 1, count, tag.label, 60)                            
+
+            # query categories
+            wikipedia_pageid = tag.label if use_label else tag.wikipedia_pageid
+            if tag.url_status == Tag.TAG_URL_STATUS_DICT['redirection'] and tag.alternative_wikipedia_pageid is not None :
+                wikipedia_pageid = tag.alternative_label if use_label else tag.alternative_wikipedia_pageid
+
+            with transaction.commit_on_success():
+                if types_mask & TYPES_MASK_DICT['visible']:
+                    res = self.query_all_categories(False, site, wikipedia_pageid, use_label)
+                    self.process_categories(res, False, tag)
+    
+                if types_mask & TYPES_MASK_DICT['hidden']:
+                    res = self.query_all_categories(True, site, wikipedia_pageid, use_label)
+                    self.process_categories(res, True, tag)
+                
+                if types_mask & TYPES_MASK_DICT['infobox']:
+                    res = self.query_infoboxes(site, wikipedia_pageid, use_label)
+                    self.process_infoboxes(res, tag)
+            
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/migrations/0001_initial.py	Mon Jan 30 18:20:59 2012 +0100
@@ -0,0 +1,131 @@
+# encoding: utf-8
+from south.db import db
+from south.v2 import SchemaMigration
+
+class Migration(SchemaMigration):
+
+    def forwards(self, orm):
+        
+        # Adding model 'WpCategory'
+        db.create_table('hdalab_wpcategory', ( #@UndefinedVariable
+            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
+            ('label', self.gf('django.db.models.fields.CharField')(unique=True, max_length=2048)),
+        ))
+        db.send_create_signal('hdalab', ['WpCategory']) #@UndefinedVariable
+
+        # Adding model 'TagWpCategory'
+        db.create_table('hdalab_tagwpcategory', ( #@UndefinedVariable
+            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
+            ('tag', self.gf('django.db.models.fields.related.ForeignKey')(related_name='wp_categories', to=orm['hdabo.Tag'])),
+            ('wp_category', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['hdalab.WpCategory'])),
+            ('hidden', self.gf('django.db.models.fields.BooleanField')(default=False)),
+        ))
+        db.send_create_signal('hdalab', ['TagWpCategory']) #@UndefinedVariable
+
+        # Adding unique constraint on 'TagWpCategory', fields ['tag', 'wp_category', 'hidden']
+        db.create_unique('hdalab_tagwpcategory', ['tag_id', 'wp_category_id', 'hidden']) #@UndefinedVariable
+
+        # Adding model 'TagInfobox'
+        db.create_table('hdalab_taginfobox', ( #@UndefinedVariable
+            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
+            ('tag', self.gf('django.db.models.fields.related.ForeignKey')(related_name='infoboxes', to=orm['hdabo.Tag'])),
+            ('name', self.gf('django.db.models.fields.CharField')(max_length=2048)),
+            ('source', self.gf('django.db.models.fields.TextField')(null=True, blank=True)),
+            ('revision_id', self.gf('django.db.models.fields.BigIntegerField')(null=True, blank=True)),
+        ))
+        db.send_create_signal('hdalab', ['TagInfobox']) #@UndefinedVariable
+
+        # Adding unique constraint on 'TagInfobox', fields ['tag', 'name', 'revision_id']
+        db.create_unique('hdalab_taginfobox', ['tag_id', 'name', 'revision_id']) #@UndefinedVariable
+
+        # Adding model 'InfoboxParameter'
+        db.create_table('hdalab_infoboxparameter', ( #@UndefinedVariable
+            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
+            ('tag_infobox', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['hdalab.TagInfobox'])),
+            ('param_name', self.gf('django.db.models.fields.CharField')(max_length=2048)),
+            ('param_value', self.gf('django.db.models.fields.TextField')(null=True, blank=True)),
+        ))
+        db.send_create_signal('hdalab', ['InfoboxParameter']) #@UndefinedVariable
+
+        # Adding unique constraint on 'InfoboxParameter', fields ['tag_infobox', 'param_name']
+        db.create_unique('hdalab_infoboxparameter', ['tag_infobox_id', 'param_name']) #@UndefinedVariable
+
+
+    def backwards(self, orm):
+        
+        # Removing unique constraint on 'InfoboxParameter', fields ['tag_infobox', 'param_name']
+        db.delete_unique('hdalab_infoboxparameter', ['tag_infobox_id', 'param_name']) #@UndefinedVariable
+
+        # Removing unique constraint on 'TagInfobox', fields ['tag', 'name', 'revision_id']
+        db.delete_unique('hdalab_taginfobox', ['tag_id', 'name', 'revision_id']) #@UndefinedVariable
+
+        # Removing unique constraint on 'TagWpCategory', fields ['tag', 'wp_category', 'hidden']
+        db.delete_unique('hdalab_tagwpcategory', ['tag_id', 'wp_category_id', 'hidden']) #@UndefinedVariable
+
+        # Deleting model 'WpCategory'
+        db.delete_table('hdalab_wpcategory') #@UndefinedVariable
+
+        # Deleting model 'TagWpCategory'
+        db.delete_table('hdalab_tagwpcategory') #@UndefinedVariable
+
+        # Deleting model 'TagInfobox'
+        db.delete_table('hdalab_taginfobox') #@UndefinedVariable
+
+        # Deleting model 'InfoboxParameter'
+        db.delete_table('hdalab_infoboxparameter') #@UndefinedVariable
+
+
+    models = {
+        'hdabo.tag': {
+            'Meta': {'unique_together': "(('label', 'original_label', 'url_status'),)", 'object_name': 'Tag'},
+            'alias': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
+            'alternative_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
+            'alternative_wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
+            'alternative_wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagCategory']", 'null': 'True', 'blank': 'True'}),
+            'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
+            'dbpedia_uri': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
+            'normalized_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
+            'original_label': ('django.db.models.fields.CharField', [], {'max_length': '1024'}),
+            'popularity': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
+            'url_status': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}),
+            'wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
+            'wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'})
+        },
+        'hdabo.tagcategory': {
+            'Meta': {'object_name': 'TagCategory'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
+        },
+        'hdalab.infoboxparameter': {
+            'Meta': {'unique_together': "(('tag_infobox', 'param_name'),)", 'object_name': 'InfoboxParameter'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'param_name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
+            'param_value': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'tag_infobox': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdalab.TagInfobox']"})
+        },
+        'hdalab.taginfobox': {
+            'Meta': {'unique_together': "(('tag', 'name', 'revision_id'),)", 'object_name': 'TagInfobox'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
+            'revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
+            'source': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'infoboxes'", 'to': "orm['hdabo.Tag']"})
+        },
+        'hdalab.tagwpcategory': {
+            'Meta': {'unique_together': "(('tag', 'wp_category', 'hidden'),)", 'object_name': 'TagWpCategory'},
+            'hidden': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'wp_categories'", 'to': "orm['hdabo.Tag']"}),
+            'wp_category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdalab.WpCategory']"})
+        },
+        'hdalab.wpcategory': {
+            'Meta': {'object_name': 'WpCategory'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '2048'})
+        }
+    }
+
+    complete_apps = ['hdalab']
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/models/__init__.py	Mon Jan 30 18:20:59 2012 +0100
@@ -0,0 +1,3 @@
+from hdalab.models.categories import WpCategory, InfoboxParameter, TagInfobox, TagWpCategory
+
+__all__ = ['WpCategory', 'InfoboxParameter', 'TagInfobox', 'TagWpCategory']
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/models/categories.py	Mon Jan 30 18:20:59 2012 +0100
@@ -0,0 +1,46 @@
+'''
+Created on Jan 26, 2012
+
+@author: ymh
+'''
+from django.db import models
+from hdabo.models import Tag
+
+
+class WpCategory(models.Model):
+    label = models.CharField(max_length=2048, unique=True, blank=False, null=False)
+    
+    def __unicode__(self):
+        return unicode(self.label)
+    
+    class Meta:
+        app_label = 'hdalab'
+        verbose_name_plural = "WpCategories"
+
+class TagWpCategory(models.Model):
+    tag = models.ForeignKey(Tag, related_name="wp_categories")
+    wp_category = models.ForeignKey(WpCategory)
+    hidden = models.BooleanField(blank=False, null=False)
+    
+    class Meta:
+        app_label = 'hdalab'
+        unique_together = ('tag', 'wp_category', 'hidden')
+
+class TagInfobox(models.Model):
+    tag = models.ForeignKey(Tag, related_name="infoboxes")
+    name = models.CharField(max_length=2048, unique=False, blank=False, null=False)
+    source = models.TextField(unique=False, blank=True, null=True)
+    revision_id = models.BigIntegerField(unique=False, blank=True, null=True)
+    
+    class Meta:
+        app_label = 'hdalab'
+        unique_together = ('tag','name','revision_id')
+    
+class InfoboxParameter(models.Model):
+    tag_infobox = models.ForeignKey(TagInfobox)
+    param_name = models.CharField(max_length=2048, unique=False, blank=False, null=False)
+    param_value = models.TextField(unique=False, blank=True, null=True)
+
+    class Meta:
+        app_label = 'hdalab'
+        unique_together = ('tag_infobox','param_name')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/settings.py	Mon Jan 30 18:20:59 2012 +0100
@@ -0,0 +1,160 @@
+# -*- coding: utf-8 -*-
+# Django settings for hdalab project.
+
+DEBUG = True
+TEMPLATE_DEBUG = DEBUG
+
+ADMINS = (
+    # ('Your Name', 'your_email@example.com'),
+)
+
+MANAGERS = ADMINS
+
+DATABASES = {
+    'default': {
+        'ENGINE': 'django.db.backends.', # Add 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'.
+        'NAME': '',                      # Or path to database file if using sqlite3.
+        'USER': '',                      # Not used with sqlite3.
+        'PASSWORD': '',                  # Not used with sqlite3.
+        'HOST': '',                      # Set to empty string for localhost. Not used with sqlite3.
+        'PORT': '',                      # Set to empty string for default. Not used with sqlite3.
+    }
+}
+
+# Local time zone for this installation. Choices can be found here:
+# http://en.wikipedia.org/wiki/List_of_tz_zones_by_name
+# although not all choices may be available on all operating systems.
+# On Unix systems, a value of None will cause Django to use the same
+# timezone as the operating system.
+# If running in a Windows environment this must be set to the same as your
+# system time zone.
+TIME_ZONE = 'America/Chicago'
+
+# Language code for this installation. All choices can be found here:
+# http://www.i18nguy.com/unicode/language-identifiers.html
+LANGUAGE_CODE = 'en-us'
+
+SITE_ID = 1
+
+# If you set this to False, Django will make some optimizations so as not
+# to load the internationalization machinery.
+USE_I18N = True
+
+# If you set this to False, Django will not format dates, numbers and
+# calendars according to the current locale
+USE_L10N = True
+
+# Absolute filesystem path to the directory that will hold user-uploaded files.
+# Example: "/home/media/media.lawrence.com/media/"
+MEDIA_ROOT = ''
+
+# URL that handles the media served from MEDIA_ROOT. Make sure to use a
+# trailing slash.
+# Examples: "http://media.lawrence.com/media/", "http://example.com/media/"
+MEDIA_URL = ''
+
+# Absolute path to the directory static files should be collected to.
+# Don't put anything in this directory yourself; store your static files
+# in apps' "static/" subdirectories and in STATICFILES_DIRS.
+# Example: "/home/media/media.lawrence.com/static/"
+STATIC_ROOT = ''
+
+# URL prefix for static files.
+# Example: "http://media.lawrence.com/static/"
+STATIC_URL = '/static/'
+
+# URL prefix for admin static files -- CSS, JavaScript and images.
+# Make sure to use a trailing slash.
+# Examples: "http://foo.com/static/admin/", "/static/admin/".
+ADMIN_MEDIA_PREFIX = '/static/admin/'
+
+# Additional locations of static files
+STATICFILES_DIRS = (
+    # Put strings here, like "/home/html/static" or "C:/www/django/static".
+    # Always use forward slashes, even on Windows.
+    # Don't forget to use absolute paths, not relative paths.
+)
+
+# List of finder classes that know how to find static files in
+# various locations.
+STATICFILES_FINDERS = (
+    'django.contrib.staticfiles.finders.FileSystemFinder',
+    'django.contrib.staticfiles.finders.AppDirectoriesFinder',
+#    'django.contrib.staticfiles.finders.DefaultStorageFinder',
+)
+
+# Make this unique, and don't share it with anybody.
+SECRET_KEY = 'u!@fo&-)d-hqz7==jmc2*_^__wod8$k^lb7^)y@ihbok)gn4fe'
+
+# List of callables that know how to import templates from various sources.
+TEMPLATE_LOADERS = (
+    'django.template.loaders.filesystem.Loader',
+    'django.template.loaders.app_directories.Loader',
+#     'django.template.loaders.eggs.Loader',
+)
+
+MIDDLEWARE_CLASSES = (
+    'django.middleware.common.CommonMiddleware',
+    'django.contrib.sessions.middleware.SessionMiddleware',
+    'django.middleware.csrf.CsrfViewMiddleware',
+    'django.contrib.auth.middleware.AuthenticationMiddleware',
+    'django.contrib.messages.middleware.MessageMiddleware',
+)
+
+ROOT_URLCONF = 'hdalab.urls'
+
+TEMPLATE_DIRS = (
+    # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates".
+    # Always use forward slashes, even on Windows.
+    # Don't forget to use absolute paths, not relative paths.
+)
+
+INSTALLED_APPS = (
+    'south',
+    'django.contrib.auth',
+    'django.contrib.contenttypes',
+    'django.contrib.sessions',
+    'django.contrib.sites',
+    'django.contrib.messages',
+    'django.contrib.staticfiles',
+    'django.contrib.admin',
+    'django_extensions',
+    'hdabo',
+    'hdalab'
+)
+
+# A sample logging configuration. The only tangible logging
+# performed by this configuration is to send an email to
+# the site admins on every HTTP 500 error.
+# See http://docs.djangoproject.com/en/dev/topics/logging for
+# more details on how to customize your logging configuration.
+#LOGGING = {
+#    'version': 1,
+#    'disable_existing_loggers': False,
+#    'handlers': {
+#        'mail_admins': {
+#            'level': 'ERROR',
+#            'class': 'django.utils.log.AdminEmailHandler'
+#        }
+#    },
+#    'loggers': {
+#        'django.request': {
+#            'handlers': ['mail_admins'],
+#            'level': 'ERROR',
+#            'propagate': True,
+#        },
+#    }
+#}
+
+HAYSTACK_SITECONF = 'hdabo.search.sites'
+HAYSTACK_SEARCH_ENGINE = 'hdabo.search.french_whoosh'
+#HAYSTACK_WHOOSH_PATH = os.path.abspath(BASE_DIR + "../index/").rstrip("/") + "/"
+
+WIKIPEDIA_API_URL = "http://fr.wikipedia.org/w/api.php"
+WIKIPEDIA_VERSION_PERMALINK_TEMPLATE = "http://fr.wikipedia.org/w/index.php?oldid=%s"
+DBPEDIA_URI_TEMPLATE = "http://dbpedia.org/resource/%s"
+
+
+
+from hdalab.config import * #@UnusedWildImport
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/urls.py	Mon Jan 30 18:20:59 2012 +0100
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+from django.conf.urls.defaults import patterns, include, url
+from django.contrib import admin
+
+# Uncomment the next two lines to enable the admin:
+# from django.contrib import admin
+# admin.autodiscover()
+
+urlpatterns = patterns('',
+    # Examples:
+    # url(r'^$', 'hdalab.views.home', name='home'),
+    # url(r'^hdalab/', include('hdalab.foo.urls')),
+
+    # Uncomment the admin/doc line below to enable admin documentation:
+    # url(r'^admin/doc/', include('django.contrib.admindocs.urls')),
+
+    # Uncomment the next line to enable the admin:
+    url(r'^admin/', include(admin.site.urls)),
+    
+    
+)