hdabo: changeset 47:08b008c5a07d

--- a/.settings/org.eclipse.core.resources.prefs	Mon Jun 20 15:49:22 2011 +0200
+++ b/.settings/org.eclipse.core.resources.prefs	Wed Jun 22 01:00:47 2011 +0200
@@ -1,4 +1,4 @@
-#Fri Jun 17 01:31:06 CEST 2011
+#Fri Jun 17 17:33:03 CEST 2011
 eclipse.preferences.version=1
 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/haystack/backends/__init__.py=utf-8
 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/fields.py=utf-8
@@ -8,8 +8,9 @@
 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/wiki.py=utf-8
 encoding//web/hdabo/fields.py=utf-8
 encoding//web/hdabo/forms.py=utf-8
-encoding//web/hdabo/management/commands/importcsv.py=utf-8
-encoding//web/hdabo/management/commands/querywikipedia.py=utf-8
+encoding//web/hdabo/management/commands/import_csv.py=utf-8
+encoding//web/hdabo/management/commands/import_tag_popularity.py=utf-8
+encoding//web/hdabo/management/commands/query_wikipedia.py=utf-8
 encoding//web/hdabo/models.py=utf-8
 encoding//web/hdabo/search/french_whoosh_backend.py=utf-8
 encoding//web/hdabo/tests/models.py=utf-8

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sql/create_db.sql	Wed Jun 22 01:00:47 2011 +0200
@@ -0,0 +1,7 @@
+CREATE DATABASE hdabo
+  WITH ENCODING='UTF8'
+       OWNER=iri
+       TEMPLATE=template0
+       LC_COLLATE='fr_FR.UTF-8'
+       LC_CTYPE='fr_FR.UTF-8'
+       CONNECTION LIMIT=-1;
\ No newline at end of file

Binary file web/hdabo/fixtures/datasheet_10.yaml.bz2 has changed

Binary file web/hdabo/fixtures/datasheet_347.yaml.bz2 has changed

Binary file web/hdabo/fixtures/initial_data.yaml.bz2 has changed

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdabo/management/commands/import_csv.py	Wed Jun 22 01:00:47 2011 +0200
@@ -0,0 +1,257 @@
+# -*- coding: utf-8 -*-
+'''
+Created on May 25, 2011
+
+@author: ymh
+'''
+#Auteur,Chemin,Comment,Controle,Datcre,Datmaj,Desc,Domaine,Format,ID,Insee,Org,Org_Home,OrgID,Periode1,Periode2,Periode3,Satut,Sousdom,Tag,Theme2,Theme3,Titre,Url,Vignette,Ville
+#"Auteur","Chemin","Comment","Controle","Datcre","Datmaj","Desc","Domaine","Format","ID","Insee","Org","Org_Home","OrgID","Periode1","Periode2","Periode3","Satut","Sousdom","Tag","Theme2","Theme3","Titre","Url","Vignette","Ville",
+
+from django.core.management.base import BaseCommand, CommandError
+from django.db import transaction
+from hdabo.models import (Author, Datasheet, DocumentFormat, Domain, Organisation,
+    Tag, TaggedSheet, TimePeriod, Location)
+from hdabo.wp_utils import normalize_tag
+from optparse import make_option
+import csv
+import datetime
+import math
+import sys
+
+class Command(BaseCommand):
+    '''
+    Command to import csvfile
+    '''
+    args = '<path_to_csv_file path_to_csv_file ...>'
+    options = '[--ignore-existing] [--lines] [--encoding]'
+    help = """Import of a csv file for hdabo
+Options:
+    --ignore-existing : ignore existing datasheets
+    --lines : max number of lines to load (for each file). 0 means all.
+    --encoding : files encoding. default to latin-1"""
+    
+    option_list = BaseCommand.option_list + (
+        make_option('--encoding',
+            action='store',
+            type='string',
+            dest='encoding',
+            default="latin-1",
+            help='fix the file encoding. default to latin-1'),
+        make_option('--delimiter',
+            action='store',
+            type='string',
+            dest='delimiter',
+            default=";",
+            help='csv file delimiter'),
+        make_option('--dialect',
+            action='store',
+            type='string',
+            dest='dialect',
+            default="excel",
+            help='csv dialect'),
+        make_option('--fieldnames',
+            action='store',
+            type='string',
+            dest='fieldnames',
+            default=None,
+            help='fields list (comma separated)'),
+        make_option('--lines',
+            action='store',
+            type='int',
+            dest='lines',
+            default=0,
+            help='Number of lines to read. 0 means all.'),
+        make_option('--ignore-existing',
+            action='store_true',
+            dest='ignore_existing',
+            default=False,
+            help='force insertion'),
+        
+        )
+    
+    def show_progress(self, current_line, total_line, width):
+
+        percent = (float(current_line) / float(total_line)) * 100.0
+
+        marks = math.floor(width * (percent / 100.0))
+        spaces = math.floor(width - marks)
+    
+        loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']'
+    
+        sys.stdout.write("%s %d%% %d/%d\r" % (loader, percent, current_line - 1, total_line - 1)) #takes the header into account
+        if percent >= 100:
+            sys.stdout.write("\n")
+        sys.stdout.flush()
+
+    
+    def create_domain_period(self, row_value, klass, school_period):
+        res_list = []
+        if not row_value:
+            return res_list
+        for label_str in [dstr.strip() for dstr in row_value.split('\x0b')]:
+            if label_str:
+                res_obj, created = klass.objects.get_or_create(label=label_str, school_period=school_period, defaults={"label":label_str, "school_period":school_period}) #@UnusedVariable
+                res_list.append(res_obj)
+        return res_list
+    
+    def create_datasheet(self, row):
+        
+        if self.ignore_existing and Datasheet.objects.filter(hda_id=row[u"ID"]).count() > 0:
+            return
+        
+        author_str = row[u'Auteur']
+        if author_str:
+            author_array = author_str.split(" ")
+            if len(author_array) == 0:
+                firstname = ""
+                lastname = ""
+            elif len(author_array) == 1:
+                firstname = ""
+                lastname = author_array[0]
+            elif len(author_array) == 2:
+                firstname = author_array[0]
+                lastname = author_array[1]
+                
+            author, created = Author.objects.get_or_create(hda_id=author_str, defaults={"firstname":firstname, "lastname":lastname}) #@UnusedVariable
+        else:
+            author = None
+        
+        org_str = row[u"Org"]    
+        if org_str:
+            url_str = row[u'Org_Home']
+            if url_str is not None:
+                url_str = url_str.strip()
+            org, created = Organisation.objects.get_or_create(hda_id=org_str, defaults={"name":org_str, "website" : url_str}) #@UnusedVariable
+        else:
+            org = None
+            
+        town_str = row[u"Ville"]
+        if town_str:
+            insee_str = row[u'Insee'].strip() if row[u'Insee'] else row[u'Insee']
+            if len(insee_str) > 5:
+                insee_str = "" 
+            loc, created = Location.objects.get_or_create(insee=insee_str, defaults={"name": town_str, "insee": insee_str}) #@UnusedVariable
+        else:
+            loc = None
+            
+        format_str = row[u"Format"]
+        if format_str:
+            format, created = DocumentFormat.objects.get_or_create(label=format_str, defaults={"label": format_str}) #@UnusedVariable
+        else:
+            format = None
+        
+        domains = self.create_domain_period(row[u"Domaine"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Global'])
+                                        
+        primary_periods = self.create_domain_period(row[u"Periode1"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Primaire'])
+        college_periods = self.create_domain_period(row[u"Periode2"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Collège'])
+        highschool_periods = self.create_domain_period(row[u"Periode3"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Lycée'])
+                    
+        primary_themes = self.create_domain_period(row[u"Sousdom"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Primaire'])
+        college_themes = self.create_domain_period(row[u"Theme2"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Collège'])
+        highschool_themes = self.create_domain_period(row[u"Theme3"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Lycée'])
+        
+        url = row[u"Url"]
+        if url is not None:
+            url = url.strip()
+        
+        datasheet = Datasheet.objects.create(
+            hda_id=row[u"ID"],
+            author=author,
+            organisation=org,
+            title=row[u"Titre"],
+            description=row[u"Desc"],
+            url=url,
+            town=loc,
+            format=format,
+            original_creation_date=datetime.datetime.strptime(row[u"Datcre"], "%d/%m/%Y").date(),
+            original_modification_date=datetime.datetime.strptime(row[u"Datmaj"], "%d/%m/%Y").date(),
+            validated=False                                          
+        )
+        
+        datasheet.save()
+        
+        datasheet.domains = domains
+        datasheet.primary_periods = primary_periods
+        datasheet.college_periods = college_periods
+        datasheet.highschool_periods = highschool_periods
+        datasheet.primary_themes = primary_themes
+        datasheet.college_themes = college_themes
+        datasheet.highschool_themes = highschool_themes
+
+        
+        if row[u'Tag']:
+            for i, tag in enumerate([t.strip() for t in row[u'Tag'].split(u";")]):
+                if len(tag) == 0:
+                    continue
+                tag_label = normalize_tag(tag)
+                tag_obj, created = Tag.objects.get_or_create(label__iexact=tag_label, defaults={'label':tag_label, 'original_label':tag}) #@UnusedVariable
+                tagged_ds = TaggedSheet(datasheet=datasheet, tag=tag_obj, original_order=i + 1, order=i + 1)
+                tagged_ds.save()
+        
+
+    def handle(self, *args, **options):
+        
+        if len(args) == 0:
+            raise CommandError("Gives at lat one csv file to import")
+        
+        self.encoding = options.get('encoding', "latin-1")
+        lines = options.get('lines', 0)
+        self.ignore_existing = options.get('ignore_existing', False)
+        fieldnames = options.get('fieldnames', None)
+
+        transaction.commit_unless_managed()
+        transaction.enter_transaction_management()
+        transaction.managed(True)
+
+        try:
+            for csv_path in args:
+                try:
+                    print "Processing %s " % (csv_path)
+                    with open(csv_path, 'rU') as csv_file:
+                        
+                        # get the number of lines if necessary
+                        if not lines:
+                            for i, l in enumerate(csv_file): #@UnusedVariable
+                                pass                        
+                            total_line = i + 1
+                            if fieldnames:
+                                total_line = total_line + 1
+                            csv_file.seek(0)
+                        else:
+                            total_line = lines + 1
+                        
+                        delimiter = options.get('delimiter', ";")
+                        if delimiter == "TAB" or delimiter == "\\t":
+                            delimiter = '\t'
+
+                        dr_kwargs = {'delimiter':delimiter}
+                        if  fieldnames is not None:
+                            dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")]
+                        dialect = options.get('dialect', "excel")
+                        if dialect is not None:
+                            dr_kwargs['dialect'] = dialect
+                           
+                        reader = csv.DictReader(csv_file, **dr_kwargs)
+        
+                        for j, row in enumerate(reader):
+                            if lines and j >= lines:
+                                break
+                            line_num = reader.line_num if fieldnames is None else reader.line_num + 1
+                            self.show_progress(line_num, total_line, 60)
+                            def safe_decode(val, encoding):
+                                if val:
+                                    return val.decode(encoding)
+                                else:
+                                    return val
+                                                        
+                            row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()])
+                            self.create_datasheet(row)
+                            
+                            transaction.commit()
+                except Exception:
+                    transaction.rollback()
+                    raise 
+                finally:
+                    print('')
+        finally:
+            transaction.leave_transaction_management()

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdabo/management/commands/import_tag_popularity.py	Wed Jun 22 01:00:47 2011 +0200
@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jun 17, 2011
+
+@author: ymh
+
+command to import tag popularity
+
+'''
+
+from django.core.management.base import BaseCommand, CommandError
+from hdabo.models import Tag
+from optparse import make_option
+import csv
+import math
+import sys
+
+
+class Command(BaseCommand):
+    '''
+    Command to import csvfile
+    '''
+    args = '<path_to_csv_file path_to_csv_file ...>'
+    options = '[--ignore-existing] [--lines] [--encoding]'
+    help = """Import of a tag popularity file for hdabo
+Options:
+    --ignore-existing : ignore existing datasheets
+    --lines : max number of lines to load (for each file). 0 means all.
+    --encoding : files encoding. default to latin-1"""
+    
+    option_list = BaseCommand.option_list + (
+        make_option('--encoding',
+            action='store',
+            type='string',
+            dest='encoding',
+            default="latin-1",
+            help='fix the file encoding. default to latin-1'),
+        make_option('--delimiter',
+            action='store',
+            type='string',
+            dest='delimiter',
+            default=";",
+            help='csv file delimiter'),
+        make_option('--dialect',
+            action='store',
+            type='string',
+            dest='dialect',
+            default="excel",
+            help='csv dialect'),
+        make_option('--fieldnames',
+            action='store',
+            type='string',
+            dest='fieldnames',
+            default="label,popularity",
+            help='fields list (comma separated)'),
+        make_option('--lines',
+            action='store',
+            type='int',
+            dest='lines',
+            default=0,
+            help='Number of lines to read. 0 means all.'),
+        
+        )
+    
+    def show_progress(self, current_line, total_line, width):
+
+        percent = (float(current_line) / float(total_line)) * 100.0
+
+        marks = math.floor(width * (percent / 100.0))
+        spaces = math.floor(width - marks)
+    
+        loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']'
+    
+        sys.stdout.write("%s %d%% %d/%d\r" % (loader, percent, current_line - 1, total_line - 1)) #takes the header into account
+        if percent >= 100:
+            sys.stdout.write("\n")
+        sys.stdout.flush()
+
+    def handle(self, *args, **options):
+        
+        if len(args) == 0:
+            raise CommandError("Give one csv file to import")
+        elif len(args) > 1:
+            raise CommandError("Only one file can be imported")
+        
+        self.encoding = options.get('encoding', "latin-1")
+        lines = options.get('lines', 0)
+        fieldnames = options.get('fieldnames', "label,popularity")
+
+        csv_path = args[0]
+        
+        print("Processing %s " % (csv_path))
+        
+        with open(csv_path, 'rU') as csv_file:
+            # get the number of lines if necessary
+            if not lines:
+                for i, l in enumerate(csv_file): #@UnusedVariable
+                    pass                        
+                total_line = i + 1
+                if fieldnames:
+                    total_line = total_line + 1
+                csv_file.seek(0)
+            else:
+                total_line = lines + 1
+
+            delimiter = options.get('delimiter', ";")
+            if delimiter == "TAB" or delimiter == "\\t":
+                delimiter = '\t'
+            dr_kwargs = {'delimiter':delimiter}
+            if  fieldnames is not None:
+                dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")]
+            dialect = options.get('dialect', "excel")
+            if dialect is not None:
+                dr_kwargs['dialect'] = dialect
+               
+            reader = csv.DictReader(csv_file, **dr_kwargs)
+
+            for j, row in enumerate(reader):
+                if lines and j >= lines:
+                    break
+                line_num = reader.line_num if fieldnames is None else reader.line_num + 1
+                self.show_progress(line_num, total_line, 60)
+                def safe_decode(val, encoding):
+                    if val:
+                        return val.decode(encoding)
+                    else:
+                        return val
+                                            
+                row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()])
+                
+                label = row['label'].strip()
+                
+                if not label:
+                    continue
+                
+                for tag in Tag.objects.filter(label__iexact=label):
+                    tag.popularity = int(row['popularity'])
+                    tag.save()

--- a/web/hdabo/management/commands/importcsv.py	Mon Jun 20 15:49:22 2011 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,253 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Created on May 25, 2011
-
-@author: ymh
-'''
-#Auteur,Chemin,Comment,Controle,Datcre,Datmaj,Desc,Domaine,Format,ID,Insee,Org,Org_Home,OrgID,Periode1,Periode2,Periode3,Satut,Sousdom,Tag,Theme2,Theme3,Titre,Url,Vignette,Ville
-#"Auteur","Chemin","Comment","Controle","Datcre","Datmaj","Desc","Domaine","Format","ID","Insee","Org","Org_Home","OrgID","Periode1","Periode2","Periode3","Satut","Sousdom","Tag","Theme2","Theme3","Titre","Url","Vignette","Ville",
-
-from django.core.management.base import BaseCommand, CommandError
-from django.db import transaction
-from hdabo.models import (Author, Datasheet, DocumentFormat, Domain, Organisation,
-    Tag, TaggedSheet, TimePeriod, Location)
-from hdabo.wp_utils import normalize_tag
-from optparse import make_option
-import csv
-import datetime
-import math
-import sys
-
-class Command(BaseCommand):
-    '''
-    Command to import csvfile
-    '''
-    args = '<path_to_csv_file path_to_csv_file ...>'
-    options = '[--ignore-existing] [--lines] [--encoding]'
-    help = """Import of a csv file for hdabo
-Options:
-    --ignore-existing : ignore existing datasheets
-    --lines : max number of lines to load (for each file). 0 means all.
-    --encoding : files encoding. default to latin-1"""
-    
-    option_list = BaseCommand.option_list + (
-        make_option('--encoding',
-            action='store',
-            type='string',
-            dest='encoding',
-            default="latin-1",
-            help='fix the file encoding. default to latin-1'),
-        make_option('--delimiter',
-            action='store',
-            type='string',
-            dest='delimiter',
-            default=";",
-            help='csv file delimiter'),
-        make_option('--dialect',
-            action='store',
-            type='string',
-            dest='dialect',
-            default="excel",
-            help='csv dialect'),
-        make_option('--fieldnames',
-            action='store',
-            type='string',
-            dest='fieldnames',
-            default=None,
-            help='fields list (comma separated)'),
-        make_option('--lines',
-            action='store',
-            type='int',
-            dest='lines',
-            default=0,
-            help='Number of lines to read. 0 means all.'),
-        make_option('--ignore-existing',
-            action='store_true',
-            dest='ignore_existing',
-            default=False,
-            help='force insertion'),
-        
-        )
-    
-    def show_progress(self, current_line, total_line, width):
-
-        percent = (float(current_line) / float(total_line)) * 100.0
-
-        marks = math.floor(width * (percent / 100.0))
-        spaces = math.floor(width - marks)
-    
-        loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']'
-    
-        sys.stdout.write("%s %d%% %d/%d\r" % (loader, percent, current_line - 1, total_line - 1)) #takes the header into account
-        if percent >= 100:
-            sys.stdout.write("\n")
-        sys.stdout.flush()
-
-    
-    def create_domain_period(self, row_value, klass, school_period):
-        res_list = []
-        if not row_value:
-            return res_list
-        for label_str in [dstr.strip() for dstr in row_value.split('\x0b')]:
-            if label_str:
-                res_obj, created = klass.objects.get_or_create(label=label_str, school_period=school_period, defaults={"label":label_str, "school_period":school_period}) #@UnusedVariable
-                res_list.append(res_obj)
-        return res_list
-    
-    def create_datasheet(self, row):
-        
-        if self.ignore_existing and Datasheet.objects.filter(hda_id=row[u"ID"]).count() > 0:
-            return
-        
-        author_str = row[u'Auteur']
-        if author_str:
-            author_array = author_str.split(" ")
-            if len(author_array) == 0:
-                firstname = ""
-                lastname = ""
-            elif len(author_array) == 1:
-                firstname = ""
-                lastname = author_array[0]
-            elif len(author_array) == 2:
-                firstname = author_array[0]
-                lastname = author_array[1]
-                
-            author, created = Author.objects.get_or_create(hda_id=author_str, defaults={"firstname":firstname, "lastname":lastname}) #@UnusedVariable
-        else:
-            author = None
-        
-        org_str = row[u"Org"]    
-        if org_str:
-            url_str = row[u'Org_Home']
-            if url_str is not None:
-                url_str = url_str.strip()
-            org, created = Organisation.objects.get_or_create(hda_id=org_str, defaults={"name":org_str, "website" : url_str}) #@UnusedVariable
-        else:
-            org = None
-            
-        town_str = row[u"Ville"]
-        if town_str:
-            insee_str = row[u'Insee'].strip() if row[u'Insee'] else row[u'Insee']
-            if len(insee_str) > 5:
-                insee_str = "" 
-            loc, created = Location.objects.get_or_create(insee=insee_str, defaults={"name": town_str, "insee": insee_str}) #@UnusedVariable
-        else:
-            loc = None
-            
-        format_str = row[u"Format"]
-        if format_str:
-            format, created = DocumentFormat.objects.get_or_create(label=format_str, defaults={"label": format_str}) #@UnusedVariable
-        else:
-            format = None
-        
-        domains = self.create_domain_period(row[u"Domaine"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Global'])
-                                        
-        primary_periods = self.create_domain_period(row[u"Periode1"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Primaire'])
-        college_periods = self.create_domain_period(row[u"Periode2"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Collège'])
-        highschool_periods = self.create_domain_period(row[u"Periode3"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Lycée'])
-                    
-        primary_themes = self.create_domain_period(row[u"Sousdom"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Primaire'])
-        college_themes = self.create_domain_period(row[u"Theme2"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Collège'])
-        highschool_themes = self.create_domain_period(row[u"Theme3"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Lycée'])
-        
-        url = row[u"Url"]
-        if url is not None:
-            url = url.strip()
-        
-        datasheet = Datasheet.objects.create(
-            hda_id=row[u"ID"],
-            author=author,
-            organisation=org,
-            title=row[u"Titre"],
-            description=row[u"Desc"],
-            url=url,
-            town=loc,
-            format=format,
-            original_creation_date=datetime.datetime.strptime(row[u"Datcre"], "%d/%m/%Y").date(),
-            original_modification_date=datetime.datetime.strptime(row[u"Datmaj"], "%d/%m/%Y").date(),
-            validated=False                                          
-        )
-        
-        datasheet.save()
-        
-        datasheet.domains = domains
-        datasheet.primary_periods = primary_periods
-        datasheet.college_periods = college_periods
-        datasheet.highschool_periods = highschool_periods
-        datasheet.primary_themes = primary_themes
-        datasheet.college_themes = college_themes
-        datasheet.highschool_themes = highschool_themes
-
-        
-        if row[u'Tag']:
-            for i, tag in enumerate([t.strip() for t in row[u'Tag'].split(u";")]):
-                if len(tag) == 0:
-                    continue
-                tag_label = normalize_tag(tag)
-                tag_obj, created = Tag.objects.get_or_create(label__iexact=tag_label, defaults={'label':tag_label, 'original_label':tag}) #@UnusedVariable
-                tagged_ds = TaggedSheet(datasheet=datasheet, tag=tag_obj, original_order=i + 1, order=i + 1)
-                tagged_ds.save()
-        
-
-    def handle(self, *args, **options):
-        
-        if len(args) == 0:
-            raise CommandError("Gives at lat one csv file to import")
-        
-        self.encoding = options.get('encoding', "latin-1")
-        lines = options.get('lines', 0)
-        self.ignore_existing = options.get('ignore_existing', False)
-        fieldnames = options.get('fieldnames', None)
-
-        transaction.commit_unless_managed()
-        transaction.enter_transaction_management()
-        transaction.managed(True)
-
-        try:
-            for csv_path in args:
-                try:
-                    print "Processing %s " % (csv_path)
-                    with open(csv_path, 'rU') as csv_file:
-                        
-                        # get the number of lines if necessary
-                        if not lines:
-                            for i, l in enumerate(csv_file): #@UnusedVariable
-                                pass                        
-                            total_line = i + 1
-                            if fieldnames:
-                                total_line = total_line + 1
-                            csv_file.seek(0)
-                        else:
-                            total_line = lines + 1
-                        
-                        dr_kwargs = {'delimiter':options.get('delimiter', ";")}
-                        if  fieldnames is not None:
-                            dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")]
-                        dialect = options.get('dialect', "excel")
-                        if dialect is not None:
-                            dr_kwargs['dialect'] = dialect
-                           
-                        reader = csv.DictReader(csv_file, **dr_kwargs)
-        
-                        for j, row in enumerate(reader):
-                            if lines and j >= lines:
-                                break
-                            line_num = reader.line_num if fieldnames is None else reader.line_num + 1
-                            self.show_progress(line_num, total_line, 60)
-                            def safe_decode(val, encoding):
-                                if val:
-                                    return val.decode(encoding)
-                                else:
-                                    return val
-                                                        
-                            row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()])
-                            self.create_datasheet(row)
-                            
-                            transaction.commit()
-                except Exception:
-                    transaction.rollback()
-                    raise 
-                finally:
-                    print('')
-        finally:
-            transaction.leave_transaction_management()

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdabo/management/commands/order_tags.py	Wed Jun 22 01:00:47 2011 +0200
@@ -0,0 +1,105 @@
+'''
+Created on Jun 7, 2011
+
+@author: ymh
+'''
+
+from django.core.management.base import NoArgsCommand
+from django.core.management.color import no_style
+from haystack.constants import DJANGO_ID
+from haystack.query import SearchQuerySet
+from hdabo.models import Datasheet
+import math
+import sys
+from optparse import make_option
+from django.db import transaction
+
+
+class Command(NoArgsCommand):
+    '''
+    Command to calculate the order of tags based on indexation
+    recalculate all tags. Will ask for confirmation
+    '''
+
+    args = ''
+    options = '-f : force '
+    help = "calculate the order of tags based on indexation recalculate all tags. Will ask for confirmation"
+    
+    option_list = NoArgsCommand.option_list + (
+        make_option('-f', '--force',
+            action='store_true',
+            dest='force',
+            default=False,
+            help='force reordering of all datasheets'),
+        )
+
+
+    def show_progress(self, current_line, total_line, width):
+
+        percent = (float(current_line) / float(total_line)) * 100.0
+
+        marks = math.floor(width * (percent / 100.0))
+        spaces = math.floor(width - marks)
+    
+        loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
+    
+        sys.stdout.write(u"%s %d%% %d/%d\r" % (loader, percent, current_line, total_line)) #takes the header into account
+        if percent >= 100:
+            sys.stdout.write("\n")
+        sys.stdout.flush()
+
+
+    def handle_noargs(self, **options):
+        
+        self.style = no_style()
+        
+        interactive = options.get('interactive', True)
+        force = options.get('force', True)
+        
+        if interactive:
+            confirm = raw_input("""You have requested to recalculate the index order of all the tags.
+This will process all the tags in %s datasheets. Are you sure you want to do this ?
+    Type 'yes' to continue, or 'no' to cancel: """ % ("all" if force else "not validated"))
+        else:
+            confirm = 'yes'
+            
+        if confirm != "yes":
+            print "Tag reordering cancelled"
+            return
+
+        if force:
+            queryset = Datasheet.objects.all()
+        else:
+            queryset = Datasheet.objects.filter(validated=False, manual_order=False)
+        total = queryset.count()
+        
+        transaction.commit_unless_managed()
+        transaction.enter_transaction_management()
+        transaction.managed(True)
+
+        try:
+            for i, ds in enumerate(queryset):
+                self.show_progress(i + 1, total, 60)
+                ts_list = []
+                for ts in ds.taggedsheet_set.all():
+                    kwargs = {DJANGO_ID + "__exact": unicode(ds.pk)}
+                    results = SearchQuerySet().filter(title=ts.tag.label).filter_or(description=ts.tag.label).filter(**kwargs)
+                    if len(results) > 0:
+                        ts.index_note = results[0].score
+                        ts.save()
+                    ts_list.append(ts)
+                ts_list.sort(key=lambda t: (-t.index_note, t.order))
+                for i, ts in enumerate(ts_list):
+                    ts.order = i + 1
+                    ts.save()
+                if ds.manual_order:
+                    ds.manual_order = False
+                    ds.save()
+                transaction.commit()
+        except:
+            transaction.rollback()
+            raise
+        finally:
+            transaction.leave_transaction_management()
+            
+

--- a/web/hdabo/management/commands/ordertags.py	Mon Jun 20 15:49:22 2011 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,74 +0,0 @@
-'''
-Created on Jun 7, 2011
-
-@author: ymh
-'''
-
-from django.core.management.base import NoArgsCommand
-from django.core.management.color import no_style
-from haystack.constants import DJANGO_ID
-from haystack.query import SearchQuerySet
-from hdabo.models import Datasheet
-import math
-import sys
-
-
-class Command(NoArgsCommand):
-    '''
-    Command to calculate the order of tags based on indexation
-    recalculate all tags. Will ask for confirmation
-    '''
-
-    args = ''
-    options = ''
-    help = "calculate the order of tags based on indexation recalculate all tags. Will ask for confirmation"
-
-    def show_progress(self, current_line, total_line, width):
-
-        percent = (float(current_line) / float(total_line)) * 100.0
-
-        marks = math.floor(width * (percent / 100.0))
-        spaces = math.floor(width - marks)
-    
-        loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
-    
-        sys.stdout.write(u"%s %d%% %d/%d\r" % (loader, percent, current_line, total_line)) #takes the header into account
-        if percent >= 100:
-            sys.stdout.write("\n")
-        sys.stdout.flush()
-
-
-    def handle_noargs(self, **options):
-        
-        self.style = no_style()
-        
-        interactive = options.get('interactive', True)
-        
-        if interactive:
-            confirm = raw_input("""You have requested to recalculate the index order of all the tags.
-This will process all the tags in all datasheets. Are you sure you want to do this ?
-    Type 'yes' to continue, or 'no' to cancel: """)
-        else:
-            confirm = 'yes'
-            
-        if confirm != "yes":
-            print "Tag reordering cancelled"
-            return
-
-        total = Datasheet.objects.all().count()
-        
-        for i, ds in enumerate(Datasheet.objects.all()):
-            self.show_progress(i + 1, total, 60)
-            ts_list = []
-            for ts in ds.taggedsheet_set.all():
-                kwargs = {DJANGO_ID + "__exact": unicode(ds.pk)}
-                results = SearchQuerySet().filter(title=ts.tag.label).filter_or(description=ts.tag.label).filter(**kwargs)
-                if len(results) > 0:
-                    ts.index_note = results[0].score
-                    ts.save()
-                ts_list.append(ts)
-            ts_list.sort(key=lambda t: (-t.index_note, t.order))
-            for i, ts in enumerate(ts_list):
-                ts.order = i + 1
-                ts.save() 
-

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdabo/management/commands/query_wikipedia.py	Wed Jun 22 01:00:47 2011 +0200
@@ -0,0 +1,174 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jun 7, 2011
+
+@author: ymh
+'''
+
+from django.conf import settings
+from django.core.management.base import NoArgsCommand
+from django.core.management.color import no_style
+from hdabo.models import Tag
+from hdabo.wp_utils import process_tag
+from optparse import make_option
+from wikitools import wiki
+import math
+import sys
+
+
+
+class Command(NoArgsCommand):
+    '''
+    query and update wikipedia for tag title.
+    '''
+    options = ''
+    help = """query and update wikipedia for tag title."""
+    
+    option_list = NoArgsCommand.option_list + (
+        make_option('--force',
+            action='store_true',
+            dest='force',
+            default=False,
+            help='force all tags to be updated, not only those not yet processed'),
+        make_option('--random',
+            action='store_true',
+            dest='random',
+            default=False,
+            help='randomize query on tags'),
+        make_option('--site',
+            action='store',
+            type='string',
+            dest='site_url',
+            default="http://fr.wikipedia.org/w/api.php",
+            help='the url for the wikipedia site'),
+        make_option('--limit',
+            action='store',
+            type='int',
+            dest='limit',
+            default= -1,
+            help='number of tag to process'),
+        make_option('--start',
+            action='store',
+            type='int',
+            dest='start',
+            default=0,
+            help='number of tag to ignore'),
+        )
+    
+    def __is_homonymie(self, page_dict):
+        for cat in page_dict.get(u"categories", []):
+            if u'Catégorie:Homonymie' in cat.get(u"title", u"") or u'Category:Disambiguation pages' in cat.get(u"title", u""):
+                return True
+        return False
+        
+
+    def process_wp_response(self, label, response):
+        
+
+        query_dict = response['query']
+        # get page if multiple pages or none -> return Tag.null_result
+        pages = query_dict.get("pages", {})
+        if len(pages) > 1 or len(pages) == 0:
+            return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
+        
+        page = pages.values()[0]
+        
+        if u"invalid" in page or u"missing" in page:
+            return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
+
+        url = page.get(u'fullurl', None)
+        pageid = page.get(u'pageid', None)
+        new_label = page[u'title']
+        
+        if self.__is_homonymie(page):
+            status = Tag.TAG_URL_STATUS_DICT["homonyme"]
+        elif u"redirect" in page:
+            status = Tag.TAG_URL_STATUS_DICT["redirection"]
+        else:
+            status = Tag.TAG_URL_STATUS_DICT["match"]
+        
+        return new_label, status, url, pageid 
+
+    def show_progress(self, current_line, total_line, label, width):
+
+        percent = (float(current_line) / float(total_line)) * 100.0
+
+        marks = math.floor(width * (percent / 100.0))
+        spaces = math.floor(width - marks)
+    
+        loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
+    
+        sys.stdout.write(u"%s %d%% %d/%d - %s\r" % (loader, percent, current_line - 1, total_line - 1, repr(label))) #takes the header into account
+        if percent >= 100:
+            sys.stdout.write("\n")
+        sys.stdout.flush()
+        
+    def handle_noargs(self, **options):
+        
+        self.style = no_style()
+        
+        interactive = options.get('interactive', True)
+        
+        verbosity = int(options.get('verbosity', '1'))
+        
+        force = options.get('force', False)
+        
+        limit = options.get("limit", -1)
+        start = options.get("start", 0)
+        
+        site_url = options.get('site_url', settings.WIKIPEDIA_API_URL)
+        
+        random = options.get('random', False)
+        
+        if verbosity > 2:
+            print "option passed : " + repr(options)
+
+        if force and interactive:
+            confirm = raw_input("""You have requested to query and replace the wikipedia information for all datasheets.
+Are you sure you want to do this ?
+    Type 'yes' to continue, or 'no' to cancel: """)
+        else:
+            confirm = 'yes'
+            
+        if confirm != "yes":
+            print "wikipedia query cancelled"
+            return
+
+        if force:
+            queryset = Tag.objects.all()
+        else:
+            queryset = Tag.objects.filter(url_status=None)                    
+        
+        if random:
+            queryset = queryset.order_by("?")
+        else:
+            queryset = queryset.order_by("label")
+        
+        if limit >= 0:
+            queryset = queryset[start:limit]
+        else:
+            queryset = queryset[start:]
+            
+        
+        if verbosity > 2 :
+            print "Tag Query is %s" % (queryset.query)
+        
+        site = wiki.Wiki(site_url) #@UndefinedVariable
+        
+        
+        count = queryset.count()
+        if verbosity > 1:
+            print "Processing %d tags" % (count)
+        
+        
+        
+        for i, tag in enumerate(queryset):
+            
+            if verbosity > 1:
+                print "processing tag %s (%d/%d)" % (tag.label, i + 1, count)
+            else:
+                self.show_progress(i + 1, count, tag.label, 60)                            
+            
+            process_tag(site, tag, verbosity)
+            
+

--- a/web/hdabo/management/commands/querywikipedia.py	Mon Jun 20 15:49:22 2011 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,193 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Created on Jun 7, 2011
-
-@author: ymh
-'''
-
-from django.conf import settings
-from django.core.management.base import NoArgsCommand
-from django.core.management.color import no_style
-from hdabo.models import Tag
-from hdabo.wp_utils import query_wikipedia_title
-from optparse import make_option
-from wikitools import wiki
-import math
-import sys
-
-
-def process_tag(site, tag, verbosity):
-    new_label, status, url, pageid, response = query_wikipedia_title(site, tag.label)
-    
-    if verbosity >= 2 :
-        print "response from query to %s with parameters %s :" % (site.apibase, repr(new_label))
-        print repr(response)
-    
-    if new_label is not None:
-        tag.label = new_label
-    if status is not None:
-        tag.url_status = status
-    if url is not None:
-        tag.wikipedia_url = url
-    if pageid is not None:
-        tag.wikipedia_pageid = pageid
-        
-    tag.save()
-
-
-
-class Command(NoArgsCommand):
-    '''
-    query and update wikipedia for tag title.
-    '''
-    options = ''
-    help = """query and update wikipedia for tag title."""
-    
-    option_list = NoArgsCommand.option_list + (
-        make_option('--force',
-            action='store_true',
-            dest='force',
-            default=False,
-            help='force all tags to be updated, not only those not yet processed'),
-        make_option('--random',
-            action='store_true',
-            dest='random',
-            default=False,
-            help='randomize query on tags'),
-        make_option('--site',
-            action='store',
-            type='string',
-            dest='site_url',
-            default="http://fr.wikipedia.org/w/api.php",
-            help='the url for the wikipedia site'),
-        make_option('--limit',
-            action='store',
-            type='int',
-            dest='limit',
-            default= -1,
-            help='number of tag to process'),
-        make_option('--start',
-            action='store',
-            type='int',
-            dest='start',
-            default=0,
-            help='number of tag to ignore'),
-        )
-    
-    def __is_homonymie(self, page_dict):
-        for cat in page_dict.get(u"categories", []):
-            if u'Catégorie:Homonymie' in cat.get(u"title", u"") or u'Category:Disambiguation pages' in cat.get(u"title", u""):
-                return True
-        return False
-        
-
-    def process_wp_response(self, label, response):
-        
-
-        query_dict = response['query']
-        # get page if multiple pages or none -> return Tag.null_result
-        pages = query_dict.get("pages", {})
-        if len(pages) > 1 or len(pages) == 0:
-            return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
-        
-        page = pages.values()[0]
-        
-        if u"invalid" in page or u"missing" in page:
-            return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
-
-        url = page.get(u'fullurl', None)
-        pageid = page.get(u'pageid', None)
-        new_label = page[u'title']
-        
-        if self.__is_homonymie(page):
-            status = Tag.TAG_URL_STATUS_DICT["homonyme"]
-        elif u"redirect" in page:
-            status = Tag.TAG_URL_STATUS_DICT["redirection"]
-        else:
-            status = Tag.TAG_URL_STATUS_DICT["match"]
-        
-        return new_label, status, url, pageid 
-
-    def show_progress(self, current_line, total_line, label, width):
-
-        percent = (float(current_line) / float(total_line)) * 100.0
-
-        marks = math.floor(width * (percent / 100.0))
-        spaces = math.floor(width - marks)
-    
-        loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
-    
-        sys.stdout.write(u"%s %d%% %d/%d - %s\r" % (loader, percent, current_line - 1, total_line - 1, repr(label))) #takes the header into account
-        if percent >= 100:
-            sys.stdout.write("\n")
-        sys.stdout.flush()
-        
-    def handle_noargs(self, **options):
-        
-        self.style = no_style()
-        
-        interactive = options.get('interactive', True)
-        
-        verbosity = int(options.get('verbosity', '1'))
-        
-        force = options.get('force', False)
-        
-        limit = options.get("limit", -1)
-        start = options.get("start", 0)
-        
-        site_url = options.get('site_url', settings.WIKIPEDIA_API_URL)
-        
-        random = options.get('random', False)
-        
-        if verbosity > 2:
-            print "option passed : " + repr(options)
-
-        if force and interactive:
-            confirm = raw_input("""You have requested to query and replace the wikipedia information for all datasheets.
-Are you sure you want to do this ?
-    Type 'yes' to continue, or 'no' to cancel: """)
-        else:
-            confirm = 'yes'
-            
-        if confirm != "yes":
-            print "wikipedia query cancelled"
-            return
-
-        if force:
-            queryset = Tag.objects.all()
-        else:
-            queryset = Tag.objects.filter(url_status=None)                    
-        
-        if random:
-            queryset = queryset.order_by("?")
-        else:
-            queryset = queryset.order_by("label")
-        
-        if limit >= 0:
-            queryset = queryset[start:limit]
-        else:
-            queryset = queryset[start:]
-            
-        
-        if verbosity > 2 :
-            print "Tag Query is %s" % (queryset.query)
-        
-        site = wiki.Wiki(site_url) #@UndefinedVariable
-        
-        
-        count = queryset.count()
-        if verbosity > 1:
-            print "Processing %d tags" % (count)
-        
-        
-        
-        for i, tag in enumerate(queryset):
-            
-            if verbosity > 1:
-                print "processing tag %s (%d/%d)" % (tag.label, i + 1, count)
-            else:
-                self.show_progress(i + 1, count, tag.label, 60)                            
-            
-            process_tag(site, tag, verbosity)
-            
-

--- a/web/hdabo/models.py	Mon Jun 20 15:49:22 2011 +0200
+++ b/web/hdabo/models.py	Wed Jun 22 01:00:47 2011 +0200
@@ -1,278 +1,280 @@
-# -*- coding: utf-8 -*-
-
-from django.contrib.auth.models import User
-from django.db import models
-from hdabo.fields import SortedManyToManyField
-from hdabo.utils import Property
-import datetime
-
-class Organisation(models.Model):
-    hda_id = models.CharField(max_length=512, unique=True, blank=False, null=False)
-    name = models.CharField(max_length=512, unique=False, blank=False, null=False)
-    location = models.CharField(max_length=512, unique=False, blank=True, null=True)
-    website = models.CharField(max_length=2048, unique=False, blank=True, null=True)
-    
-
-class Author(models.Model):
-    hda_id = models.CharField(max_length=512, unique=True, blank=False, null=False)
-    lastname = models.CharField(max_length=512, unique=False, blank=True, null=True)
-    firstname = models.CharField(max_length=512, unique=False, blank=True, null=True)
-
-class TimePeriod(models.Model):
-    TIME_PERIOD_CHOICES = (
-        (1, u'Primaire'),
-        (2, u'Collège'),
-        (3, u'Lycée'),
-    )
-    TIME_PERIOD_DICT = {
-        u'Primaire': 1,
-        u'Collège': 2,
-        u'Lycée': 3,
-    }
-    label = models.CharField(max_length=512, unique=False, blank=False, null=False)
-    school_period = models.IntegerField(choices=TIME_PERIOD_CHOICES)
-    
-    class Meta:
-        unique_together = ("label", "school_period")
-
-    def __unicode__(self):
-        return unicode(self.label)
-
-
-class Domain(models.Model):
-    DOMAIN_PERIOD_CHOICES = (
-        (0, u'Global'),
-        (1, u'Primaire'),
-        (2, u'Collège'),
-        (3, u'Lycée'),
-    )
-    DOMAIN_PERIOD_DICT = {
-        u'Global': 0,
-        u'Primaire': 1,
-        u'Collège': 2,
-        u'Lycée': 3,
-    }
-    label = models.CharField(max_length=512, unique=False, blank=False, null=False)
-    school_period = models.IntegerField(choices=DOMAIN_PERIOD_CHOICES)
-
-    class Meta:
-        unique_together = ("label", "school_period")
-
-    def __unicode__(self):
-        return unicode(self.label)
-
-
-class DocumentFormat(models.Model):
-    label = models.CharField(max_length=512, unique=True, blank=False, null=False)
-
-    def __unicode__(self):
-        return unicode(self.label)
-    
-class TagCategory(models.Model):
-    label = models.CharField(max_length=512, unique=True, blank=False, null=False)
-    
-    def __unicode__(self):
-        return unicode(self.label)
-    
-    class Meta:
-        verbose_name_plural = "TagCategories"
-
-class Tag(models.Model):
-    TAG_URL_STATUS_CHOICES = (
-        (0, "null_result"),
-        (1, "redirection"),
-        (2, "homonyme"),
-        (3, "match"),
-    )
-    
-    TAG_URL_STATUS_DICT = {
-        "null_result":0,
-        "redirection":1,
-        "homonyme":2,
-        "match":3,
-    }
-    
-    label = models.CharField(max_length=1024, unique=False, blank=False, null=False)
-    original_label = models.CharField(max_length=1024, unique=True, blank=False, null=False, editable=False)
-    alias = models.CharField(max_length=1024, unique=False, blank=True, null=True)
-    category = models.ForeignKey(TagCategory, null=True, blank=True)
-    wikipedia_url = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True)
-    wikipedia_pageid = models.BigIntegerField(unique=False, blank=True, null=True)
-    url_status = models.IntegerField(choices=TAG_URL_STATUS_CHOICES, blank=True, null=True, default=None)
-    dbpedia_uri = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True)
-
-    @Property
-    def url_status_text(): #@NoSelf
-        def fget(self):
-            return self.TAG_URL_STATUS_CHOICES[self.url_status][1]
-        
-        return locals() 
-        
-class Location(models.Model):
-    name = models.CharField(max_length=512, unique=False, blank=False, null=False)
-    insee = models.CharField(max_length=5, unique=True, blank=False, null=False)
-
-    def __unicode__(self):
-        return unicode("%s : %s" % (self.name, self.insee))
-
-class Datasheet(models.Model):
-    hda_id = models.CharField(max_length=512, unique=True, blank=False, null=False)
-    author = models.ForeignKey(Author, null=True, blank=True)
-    organisation = models.ForeignKey(Organisation)
-    title = models.CharField(max_length=2048, unique=False, blank=False, null=False)
-    description = models.TextField(blank=True, null=True)
-    url = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True)
-    domains = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Global']}, related_name="datasheets")
-    primary_periods = SortedManyToManyField(TimePeriod, limit_choices_to={'school_period':TimePeriod.TIME_PERIOD_DICT[u'Primaire']}, related_name="primary_periods_datasheets")
-    college_periods = SortedManyToManyField(TimePeriod, limit_choices_to={'school_period':TimePeriod.TIME_PERIOD_DICT[u'Collège']}, related_name="college_periods_datasheets")
-    highschool_periods = SortedManyToManyField(TimePeriod, limit_choices_to={'school_period':TimePeriod.TIME_PERIOD_DICT[u'Lycée']}, related_name="highschool_periods_datasheets")
-    primary_themes = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Primaire']}, related_name="primary_themes_datasheets")
-    college_themes = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Collège']}, related_name="college_themes_datasheets")
-    highschool_themes = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Lycée']}, related_name="highschool_themes_datasheets")
-    town = models.ForeignKey(Location, null=True, blank=True)
-    format = models.ForeignKey(DocumentFormat, null=True, blank=True)
-    original_creation_date = models.DateField()
-    original_modification_date = models.DateField()
-    modification_datetime = models.DateTimeField(auto_now=True)
-    validation_date = models.DateTimeField(null=True, blank=True)
-    validated = models.BooleanField(default=False)
-    validator = models.ForeignKey(User, null=True, blank=True)
-    tags = models.ManyToManyField(Tag, through='TaggedSheet')
-    
-    
-    def validate(self, user):
-        self.validation_date = datetime.datetime.now()
-        self.validated = True
-        self.validator = user
-        self.save()
-    
-    def unvalidate(self):
-        self.validation_date = datetime.datetime.min
-        self.validated = False
-        self.validator = None
-        self.save()
-
-    @Property
-    def domains_list(): #@NoSelf
-        def fget(self):
-            return [d.label for d in self.domains.all()]
-        
-        return locals() 
-
-    @Property
-    def domains_text(): #@NoSelf
-        def fget(self):
-            return "; ".join(self.domains_list)
-        
-        return locals() 
-
-    @Property
-    def primary_periods_list(): #@NoSelf
-        def fget(self):
-            return [d.label for d in self.primary_periods.all()] 
-
-        return locals() 
-
-    
-    @Property
-    def primary_periods_text(): #@NoSelf
-        def fget(self):
-            return "; ".join(self.primary_periods_list) 
-
-        return locals() 
-
-    @Property
-    def college_periods_list(): #@NoSelf
-        def fget(self):
-            return [d.label for d in self.college_periods.all()] 
-
-        return locals() 
-
-    @Property
-    def college_periods_text(): #@NoSelf
-        def fget(self):
-            return "; ".join(self.college_periods_list) 
-
-        return locals() 
-
-    @Property
-    def highschool_periods_list(): #@NoSelf
-        def fget(self):
-            return [d.label for d in self.highschool_periods.all()] 
-
-        return locals() 
-
-    @Property
-    def highschool_periods_text(): #@NoSelf
-        def fget(self):
-            return "; ".join(self.highschool_periods_list) 
-
-        return locals() 
-
-
-    @Property
-    def primary_themes_list(): #@NoSelf
-        def fget(self):
-            return [d.label for d in self.primary_themes.all()] 
-
-        return locals() 
-
-
-    @Property
-    def primary_themes_text(): #@NoSelf
-        def fget(self):
-            return "; ".join(self.primary_themes_list) 
-
-        return locals() 
-
-    @Property
-    def college_themes_list(): #@NoSelf
-        def fget(self):
-            return [d.label for d in self.college_themes.all()] 
-
-        return locals() 
-    
-    @Property
-    def college_themes_text(): #@NoSelf
-        def fget(self):
-            return "; ".join(self.college_themes_list) 
-
-        return locals() 
-
-    @Property
-    def highschool_themes_list(): #@NoSelf
-        def fget(self):
-            return [d.label for d in self.highschool_themes.all()] 
-
-        return locals()
-
-    @Property
-    def highschool_themes_text(): #@NoSelf
-        def fget(self):
-            return "; ".join(self.highschool_themes_list) 
-
-        return locals()
-
-    @Property
-    def town_text(): #@NoSelf
-        def fget(self):
-            return self.town.name if self.town else ""
-        
-        return locals()
-
-    @Property
-    def tags_text(): #@NoSelf
-        def fget(self):
-            return "; ".join([t.label for t in self.tags.all()])
-        
-        return locals()
-
-
-class TaggedSheet(models.Model):
-    datasheet = models.ForeignKey(Datasheet)
-    tag = models.ForeignKey(Tag)
-    original_order = models.IntegerField(default=0)
-    order = models.IntegerField(default=0)
-    index_note = models.FloatField(default=0.0)    
-    
-    
+# -*- coding: utf-8 -*-
+
+from django.contrib.auth.models import User
+from django.db import models
+from hdabo.fields import SortedManyToManyField
+from hdabo.utils import Property
+import datetime
+
+class Organisation(models.Model):
+    hda_id = models.CharField(max_length=512, unique=True, blank=False, null=False)
+    name = models.CharField(max_length=512, unique=False, blank=False, null=False)
+    location = models.CharField(max_length=512, unique=False, blank=True, null=True)
+    website = models.CharField(max_length=2048, unique=False, blank=True, null=True)
+    
+
+class Author(models.Model):
+    hda_id = models.CharField(max_length=512, unique=True, blank=False, null=False)
+    lastname = models.CharField(max_length=512, unique=False, blank=True, null=True)
+    firstname = models.CharField(max_length=512, unique=False, blank=True, null=True)
+
+class TimePeriod(models.Model):
+    TIME_PERIOD_CHOICES = (
+        (1, u'Primaire'),
+        (2, u'Collège'),
+        (3, u'Lycée'),
+    )
+    TIME_PERIOD_DICT = {
+        u'Primaire': 1,
+        u'Collège': 2,
+        u'Lycée': 3,
+    }
+    label = models.CharField(max_length=512, unique=False, blank=False, null=False)
+    school_period = models.IntegerField(choices=TIME_PERIOD_CHOICES)
+    
+    class Meta:
+        unique_together = ("label", "school_period")
+
+    def __unicode__(self):
+        return unicode(self.label)
+
+
+class Domain(models.Model):
+    DOMAIN_PERIOD_CHOICES = (
+        (0, u'Global'),
+        (1, u'Primaire'),
+        (2, u'Collège'),
+        (3, u'Lycée'),
+    )
+    DOMAIN_PERIOD_DICT = {
+        u'Global': 0,
+        u'Primaire': 1,
+        u'Collège': 2,
+        u'Lycée': 3,
+    }
+    label = models.CharField(max_length=512, unique=False, blank=False, null=False)
+    school_period = models.IntegerField(choices=DOMAIN_PERIOD_CHOICES)
+
+    class Meta:
+        unique_together = ("label", "school_period")
+
+    def __unicode__(self):
+        return unicode(self.label)
+
+
+class DocumentFormat(models.Model):
+    label = models.CharField(max_length=512, unique=True, blank=False, null=False)
+
+    def __unicode__(self):
+        return unicode(self.label)
+    
+class TagCategory(models.Model):
+    label = models.CharField(max_length=512, unique=True, blank=False, null=False)
+    
+    def __unicode__(self):
+        return unicode(self.label)
+    
+    class Meta:
+        verbose_name_plural = "TagCategories"
+
+class Tag(models.Model):
+    TAG_URL_STATUS_CHOICES = (
+        (0, "null_result"),
+        (1, "redirection"),
+        (2, "homonyme"),
+        (3, "match"),
+    )
+    
+    TAG_URL_STATUS_DICT = {
+        "null_result":0,
+        "redirection":1,
+        "homonyme":2,
+        "match":3,
+    }
+    
+    label = models.CharField(max_length=1024, unique=False, blank=False, null=False, db_index=True)
+    original_label = models.CharField(max_length=1024, unique=True, blank=False, null=False, editable=False)
+    alias = models.CharField(max_length=1024, unique=False, blank=True, null=True)
+    category = models.ForeignKey(TagCategory, null=True, blank=True)
+    wikipedia_url = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True)
+    wikipedia_pageid = models.BigIntegerField(unique=False, blank=True, null=True)
+    url_status = models.IntegerField(choices=TAG_URL_STATUS_CHOICES, blank=True, null=True, default=None)
+    dbpedia_uri = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True)
+    popularity = models.IntegerField(blank=False, null=False, default=0, db_index=True)
+
+    @Property
+    def url_status_text(): #@NoSelf
+        def fget(self):
+            return self.TAG_URL_STATUS_CHOICES[self.url_status][1]
+        
+        return locals() 
+        
+class Location(models.Model):
+    name = models.CharField(max_length=512, unique=False, blank=False, null=False)
+    insee = models.CharField(max_length=5, unique=True, blank=False, null=False)
+
+    def __unicode__(self):
+        return unicode("%s : %s" % (self.name, self.insee))
+
+class Datasheet(models.Model):
+    hda_id = models.CharField(max_length=512, unique=True, blank=False, null=False)
+    author = models.ForeignKey(Author, null=True, blank=True)
+    organisation = models.ForeignKey(Organisation)
+    title = models.CharField(max_length=2048, unique=False, blank=False, null=False)
+    description = models.TextField(blank=True, null=True)
+    url = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True)
+    domains = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Global']}, related_name="datasheets")
+    primary_periods = SortedManyToManyField(TimePeriod, limit_choices_to={'school_period':TimePeriod.TIME_PERIOD_DICT[u'Primaire']}, related_name="primary_periods_datasheets")
+    college_periods = SortedManyToManyField(TimePeriod, limit_choices_to={'school_period':TimePeriod.TIME_PERIOD_DICT[u'Collège']}, related_name="college_periods_datasheets")
+    highschool_periods = SortedManyToManyField(TimePeriod, limit_choices_to={'school_period':TimePeriod.TIME_PERIOD_DICT[u'Lycée']}, related_name="highschool_periods_datasheets")
+    primary_themes = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Primaire']}, related_name="primary_themes_datasheets")
+    college_themes = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Collège']}, related_name="college_themes_datasheets")
+    highschool_themes = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Lycée']}, related_name="highschool_themes_datasheets")
+    town = models.ForeignKey(Location, null=True, blank=True)
+    format = models.ForeignKey(DocumentFormat, null=True, blank=True)
+    original_creation_date = models.DateField()
+    original_modification_date = models.DateField()
+    modification_datetime = models.DateTimeField(auto_now=True)
+    validation_date = models.DateTimeField(null=True, blank=True)
+    validated = models.BooleanField(default=False, db_index=True)
+    validator = models.ForeignKey(User, null=True, blank=True)
+    manual_order = models.BooleanField(default=False, db_index=True)
+    tags = models.ManyToManyField(Tag, through='TaggedSheet')
+    
+    
+    def validate(self, user):
+        self.validation_date = datetime.datetime.now()
+        self.validated = True
+        self.validator = user
+        self.save()
+    
+    def unvalidate(self):
+        self.validation_date = datetime.datetime.min
+        self.validated = False
+        self.validator = None
+        self.save()
+
+    @Property
+    def domains_list(): #@NoSelf
+        def fget(self):
+            return [d.label for d in self.domains.all()]
+        
+        return locals() 
+
+    @Property
+    def domains_text(): #@NoSelf
+        def fget(self):
+            return "; ".join(self.domains_list)
+        
+        return locals() 
+
+    @Property
+    def primary_periods_list(): #@NoSelf
+        def fget(self):
+            return [d.label for d in self.primary_periods.all()] 
+
+        return locals() 
+
+    
+    @Property
+    def primary_periods_text(): #@NoSelf
+        def fget(self):
+            return "; ".join(self.primary_periods_list) 
+
+        return locals() 
+
+    @Property
+    def college_periods_list(): #@NoSelf
+        def fget(self):
+            return [d.label for d in self.college_periods.all()] 
+
+        return locals() 
+
+    @Property
+    def college_periods_text(): #@NoSelf
+        def fget(self):
+            return "; ".join(self.college_periods_list) 
+
+        return locals() 
+
+    @Property
+    def highschool_periods_list(): #@NoSelf
+        def fget(self):
+            return [d.label for d in self.highschool_periods.all()] 
+
+        return locals() 
+
+    @Property
+    def highschool_periods_text(): #@NoSelf
+        def fget(self):
+            return "; ".join(self.highschool_periods_list) 
+
+        return locals() 
+
+
+    @Property
+    def primary_themes_list(): #@NoSelf
+        def fget(self):
+            return [d.label for d in self.primary_themes.all()] 
+
+        return locals() 
+
+
+    @Property
+    def primary_themes_text(): #@NoSelf
+        def fget(self):
+            return "; ".join(self.primary_themes_list) 
+
+        return locals() 
+
+    @Property
+    def college_themes_list(): #@NoSelf
+        def fget(self):
+            return [d.label for d in self.college_themes.all()] 
+
+        return locals() 
+    
+    @Property
+    def college_themes_text(): #@NoSelf
+        def fget(self):
+            return "; ".join(self.college_themes_list) 
+
+        return locals() 
+
+    @Property
+    def highschool_themes_list(): #@NoSelf
+        def fget(self):
+            return [d.label for d in self.highschool_themes.all()] 
+
+        return locals()
+
+    @Property
+    def highschool_themes_text(): #@NoSelf
+        def fget(self):
+            return "; ".join(self.highschool_themes_list) 
+
+        return locals()
+
+    @Property
+    def town_text(): #@NoSelf
+        def fget(self):
+            return self.town.name if self.town else ""
+        
+        return locals()
+
+    @Property
+    def tags_text(): #@NoSelf
+        def fget(self):
+            return "; ".join([t.label for t in self.tags.all()])
+        
+        return locals()
+
+
+class TaggedSheet(models.Model):
+    datasheet = models.ForeignKey(Datasheet)
+    tag = models.ForeignKey(Tag)
+    original_order = models.IntegerField(null=False, blank=False, default=0)
+    order = models.IntegerField(null=False, blank=False, default=0, db_index=True)
+    index_note = models.FloatField(null=False, blank=False, default=0.0, db_index=True)
+    
+

--- a/web/hdabo/settings.py	Mon Jun 20 15:49:22 2011 +0200
+++ b/web/hdabo/settings.py	Wed Jun 22 01:00:47 2011 +0200
@@ -148,4 +148,6 @@
 
 WIKIPEDIA_API_URL = "http://fr.wikipedia.org/w/api.php"
 
+DBPEDIA_URI_TEMPLATE = "http://dbpedia.org/resource/%s"
+
 from hdabo.config import * #@UnusedWildImport

Binary file web/hdabo/static/hdabo/img/arrow_green_right.png has changed

--- a/web/hdabo/templates/partial/all_tags_table.html	Mon Jun 20 15:49:22 2011 +0200
+++ b/web/hdabo/templates/partial/all_tags_table.html	Wed Jun 22 01:00:47 2011 +0200
@@ -4,6 +4,7 @@
         <th>label</th>
         {% comment %}<th>original_label</th>{% endcomment %}
         <th class="text_centered">Lien W</th>
+        <th class="text_centered">Lien D</th>
         <th>Catégorie</th>
         <th class="large_25 text_centered">Supprimer<br/>le lien W</th>
         <th>Alias</th></tr>
@@ -17,7 +18,15 @@
             <a href="{{tag.wikipedia_url}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/arrow_right.png" ></a>
             {% else %}
             <a href="http://fr.wikipedia.org/w/index.php?search={{tag.label}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/wikipedia_search.png" ></a>
-            {% endif %}</td>
+            {% endif %}
+        </td>
+        <td class="text_centered">
+            {% if tag.dbpedia_uri and tag.dbpedia_uri != ""  %}
+            <a href="{{tag.dbpedia_uri}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/arrow_green_right.png" ></a>
+            {% else %}
+            &nbsp;
+            {% endif %}
+        </td>
         <td class="tag_category" id="{{tag.id}}">{% if tag.category %}{{ tag.category }}{% endif %}</td>
         <td class="text_centered"><img src="{{STATIC_URL}}hdabo/img/red_cross.png" class="remove_wp_link" id="{{tag.id}}" alt="{{tag.label}}" /></td>
         <td class="tag_alias" id="{{tag.id}}" >{% if tag.alias %}{{tag.alias}}{% endif %}</td></tr>

--- a/web/hdabo/templates/partial/tag_table.html	Mon Jun 20 15:49:22 2011 +0200
+++ b/web/hdabo/templates/partial/tag_table.html	Wed Jun 22 01:00:47 2011 +0200
@@ -8,6 +8,7 @@
         <th>label</th>
         {% comment %}<th>original_label</th>{% endcomment %}
         <th class="text_centered">Lien W</th>
+        <th class="text_centered">Lien D</th>
         <th>Catégorie</th>
         <th class="large_25 text_centered">Supprimer<br/>le lien W</th>
         <th>Alias</th>
@@ -28,7 +29,15 @@
 	            <a href="{{t.tag.wikipedia_url}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/arrow_right.png" ></a>
 	            {% else %}
 	            <a href="http://fr.wikipedia.org/w/index.php?search={{t.tag.label}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/wikipedia_search.png" ></a>
-	            {% endif %}</td>
+	            {% endif %}
+	        </td>
+	        <td class="text_centered">
+	            {% if tag.dbpedia_uri and tag.dbpedia_uri != ""  %}
+	            <a href="{{tag.dbpedia_uri}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/arrow_green_right.png" ></a>
+	            {% else %}
+	            &nbsp;
+	            {% endif %}
+            </td>	        
             <td>{% if t.tag.category %}{{ t.tag.category }}{% endif %}</td>
             <td class="text_centered"><img src="{{STATIC_URL}}hdabo/img/red_cross.png" class="remove_wp_link" id="{{t.tag.id}}" alt="{{t.tag.label}}" /></td>
             <td>{% if t.tag.alias %}{{t.tag.alias}}{% endif %}</td>
@@ -50,7 +59,15 @@
 	            <a href="{{t.wikipedia_url}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/arrow_right.png" ></a>
 	            {% else %}
 	            <a href="http://fr.wikipedia.org/w/index.php?search={{t.label}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/wikipedia_search.png" ></a>
-	            {% endif %}</td>
+	            {% endif %}
+	        </td>
+	        <td class="text_centered">
+                {% if tag.dbpedia_uri and tag.dbpedia_uri != ""  %}
+                <a href="{{tag.dbpedia_uri}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/arrow_green_right.png" ></a>
+                {% else %}
+                &nbsp;
+                {% endif %}
+            </td>
             <td>{% if t.category %}{{ t.category }}{% endif %}</td>
             <td class="text_centered"><img src="{{STATIC_URL}}hdabo/img/red_cross.png" class="remove_wp_link" id="{{t.id}}" /></td>
             <td>{{t.alias}}</td>

--- a/web/hdabo/views.py	Mon Jun 20 15:49:22 2011 +0200
+++ b/web/hdabo/views.py	Wed Jun 22 01:00:47 2011 +0200
@@ -4,15 +4,15 @@
 from django.contrib.auth.decorators import login_required #@UnusedImport
 from django.core.paginator import Paginator
 from django.db.models import Max
-from django.http import HttpResponse, HttpResponseBadRequest
+from django.http import HttpResponseBadRequest
 from django.shortcuts import render_to_response, redirect
 from django.template import RequestContext
 from haystack.constants import DJANGO_ID
 from haystack.query import SearchQuerySet
-from hdabo.management.commands.querywikipedia import process_tag
-from hdabo.wp_utils import (normalize_tag, query_wikipedia_title, 
+from hdabo.wp_utils import process_tag
+from hdabo.utils import OrderedDict
+from hdabo.wp_utils import (normalize_tag, query_wikipedia_title,
     get_or_create_tag)
-from hdabo.utils import OrderedDict
 from models import Datasheet, Organisation, Tag, TagCategory, TaggedSheet
 from wikitools import wiki
 import django.utils.simplejson as json
@@ -22,7 +22,7 @@
 #@login_required
 def home(request):
     
-    # Get all organisations
+    # Get all organizations
     orgas = Organisation.objects.all().order_by('name')
     # Count all validated, unvalidated sheets for each organisation
     org_list = []
@@ -132,7 +132,6 @@
     # NB : it is different from the TagSheet.order in the database.
     new_order = int(request.POST["new_order"]) - 1
     old_order = int(request.POST["old_order"]) - 1
-    s = "new_order = " + str(new_order) + ", old_order = " + str(old_order)
     # First we get the datasheet's TaggedSheets (list to force evaluation)
     ordered_tags = list(TaggedSheet.objects.filter(datasheet=Datasheet.objects.get(id=ds_id)).order_by('order'))
     # We change the moved TaggedSheets's order
@@ -143,16 +142,19 @@
     # We move the TaggedSheets's order
     if new_order > old_order :
         # And we decrease the other ones
-        for i in range(old_order+1,new_order+1) :
+        for i in range(old_order + 1, new_order + 1) :
             ts = ordered_tags[i]
             ts.order = ts.order - 1
             ts.save()
     else :
         # And we increase the other ones
-        for i in range(new_order,old_order) :
+        for i in range(new_order, old_order) :
             ts = ordered_tags[i]
             ts.order = ts.order + 1
             ts.save()
+    ds = Datasheet.objects.get(id=ds_id)
+    ds.manual_order = True
+    ds.save()
     
     return get_tag_table(request=request, ds_id=ds_id, valid=0)
 
@@ -205,6 +207,10 @@
     ts = ds_tags.filter(tag=Tag.objects.filter(id=tag_id))[0]
     ts.delete()
     
+    ds = Datasheet.objects.get(id=ds_id)
+    ds.manual_order = True
+    ds.save()
+
     return get_tag_table(request=request, ds_id=ds_id, valid=0)
 
 
@@ -218,23 +224,18 @@
     
     if tag.label != tag_label:
 
-
         tag.label = tag_label
         
         site = wiki.Wiki(settings.WIKIPEDIA_API_URL) #@UndefinedVariable
-        new_label, status, url, pageid, response = query_wikipedia_title(site, tag_label) #@UnusedVariable                    
+        wp_res = query_wikipedia_title(site, tag_label)
+        status, url, pageid, dbpedia_uri = (wp_res['status'], wp_res['wikipedia_url'], wp_res['page_id'], wp_res["dbpedia_uri"])
 
         if status is not None:
             tag.url_status = status
-        if url is not None:
-            tag.wikipedia_url = url
-        else:
-            tag.wikipedia_url = None
-            
-        if pageid is not None:
-            tag.wikipedia_pageid = pageid
-        else:
-            tag.wikipedia_pageid = None
+
+        tag.wikipedia_url = url
+        tag.wikipedia_pageid = pageid            
+        tag.dbpedia_uri = dbpedia_uri 
             
         tag.save()
     
@@ -246,7 +247,7 @@
 
     tag_id = request.POST["id"]
     tag_label = request.POST["value"]
-    ds_id=request.POST["datasheet_id"]
+    ds_id = request.POST["datasheet_id"]
         
     tag = Tag.objects.get(id=tag_id)
     
@@ -268,6 +269,8 @@
 
         ts.save()
         
+        ds.manual_order = True
+        ds.save()        
     
     return get_tag_table(request=request, ds_id=ds_id, valid=0)
 
@@ -313,10 +316,12 @@
     # if the tag is created or if the tag is not in the list
     
     list_ts = TaggedSheet.objects.filter(datasheet=ds)
-    if created or len(list_ts.filter(tag=tag))==0 :
+    if created or len(list_ts.filter(tag=tag)) == 0 :
         new_order = list_ts.aggregate(Max('order'))['order__max'] + 1
         ts = TaggedSheet.objects.create(datasheet=ds, tag=tag, original_order=new_order, order=new_order)
         ts.save()
+        ds.manual_order = True
+        ds.save()
     
     return get_tag_table(request=request, ds_id=ds_id, valid=0)
 
@@ -347,9 +352,15 @@
     else :
         valid = False
     # We validate or unvalidate the requester datasheet
+    
+    if request.user.is_authenticated():
+        user = request.user
+    else:
+        user = None
+    
     ds = Datasheet.objects.get(id=ds_id)
     if valid :
-        ds.validate(None)
+        ds.validate(user)
     else :
         ds.unvalidate()
     ds.save()
@@ -361,7 +372,7 @@
     else :
         # We ask to display the validated ds
         valid_req = 1
-    if len(same_organisation_ds)>0 :
+    if len(same_organisation_ds) > 0 :
         return redirect('list_for_orga', orga_id=ds.organisation.id, valid=valid_req)
     else :
         return redirect('home')
@@ -407,4 +418,4 @@
     # This function is available only in all_tags_table context
     return get_all_tags_table(request=request, num_page=request.POST["num_page"], nb_by_page=request.POST["nb_by_page"])
     
-        
\ No newline at end of file
+

--- a/web/hdabo/wp_utils.py	Mon Jun 20 15:49:22 2011 +0200
+++ b/web/hdabo/wp_utils.py	Wed Jun 22 01:00:47 2011 +0200
@@ -2,6 +2,20 @@
 from django.conf import settings
 from hdabo.models import Tag
 from wikitools import api, wiki
+from django.utils.http import urlquote
+
+def normalize_tag(tag):
+    if len(tag) == 0:
+        return tag
+    tag = tag.strip()
+    tag = tag.replace("_", " ")
+    tag = " ".join(tag.split())
+    tag = tag[0].upper() + tag[1:]
+    return tag
+
+def urlize_for_wkipedia(label):
+    return urlquote(label.replace(" ","_"))
+
 
 def __is_homonymie(page_dict):
     for cat in page_dict.get(u"categories", []):
@@ -11,21 +25,22 @@
 
 def query_wikipedia_title(site, label):
     
-    params = {'action':'query', 'titles': label, 'prop':'info|categories', 'inprop':'url'}            
+    params = {'action':'query', 'titles': label, 'prop':'info|categories|langlinks', 'inprop':'url', 'lllimit':'500', 'cllimit':'500'}
     wpquery = api.APIRequest(site, params) #@UndefinedVariable
     
-    response = wpquery.query()    
+    response = wpquery.query()
+    original_response = response
 
     query_dict = response['query']
     # get page if multiple pages or none -> return Tag.null_result
     pages = query_dict.get("pages", {})
     if len(pages) > 1 or len(pages) == 0:
-        return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None, response
+        return { 'new_label': None, 'status': Tag.TAG_URL_STATUS_DICT["null_result"], 'wikipedia_url': None, 'pageid': None, 'dbpedia_uri': None, 'response': response }
     
     page = pages.values()[0]
     
     if u"invalid" in page or u"missing" in page:
-        return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None, response
+        return { 'new_label': None, 'status': Tag.TAG_URL_STATUS_DICT["null_result"], 'wikipedia_url': None, 'pageid': None, 'dbpedia_uri': None, 'response': response }
 
     url = page.get(u'fullurl', None)
     pageid = page.get(u'pageid', None)
@@ -37,17 +52,37 @@
         status = Tag.TAG_URL_STATUS_DICT["redirection"]
     else:
         status = Tag.TAG_URL_STATUS_DICT["match"]
-
-    return new_label, status, url, pageid, response
+    
+    if status == Tag.TAG_URL_STATUS_DICT["redirection"]:
+        params = {'action':'query', 'titles': label, 'prop':'info|categories|langlinks', 'inprop':'url', 'lllimit':'500', 'cllimit':'500', 'redirects':True}
+        wpquery = api.APIRequest(site, params) #@UndefinedVariable    
+        response = wpquery.query()
+        query_dict = response['query']
+        pages = query_dict.get("pages", {})
+        #we know that we have at least one answer        
+        if len(pages) > 1 or len(pages) == 0:
+            return { 'new_label': None, 'status': Tag.TAG_URL_STATUS_DICT["null_result"], 'wikipedia_url': None, 'pageid': None, 'dbpedia_uri': None, 'response': response }
+        page = pages.values()[0]
+        
 
-def normalize_tag(tag):
-    if len(tag) == 0:
-        return tag
-    tag = tag.strip()
-    tag = tag.replace("_", " ")
-    tag = " ".join(tag.split())
-    tag = tag[0].upper() + tag[1:]
-    return tag
+    
+    #process language to extract the english label
+    english_label = None
+    
+    if status == Tag.TAG_URL_STATUS_DICT['match'] or status == Tag.TAG_URL_STATUS_DICT['redirection']:
+        lang_links = page.get('langlinks', [])
+        for lang_info_dict in lang_links:
+            if lang_info_dict['lang'] == "en":
+                english_label = lang_info_dict["*"]
+                break
+    
+    if english_label and "#" not in english_label:
+        dbpedia_uri = settings.DBPEDIA_URI_TEMPLATE % (urlize_for_wkipedia(english_label))
+    else:
+        dbpedia_uri = None
+
+    return { 'new_label': new_label, 'status': status, 'wikipedia_url': url, 'pageid': pageid, 'dbpedia_uri': dbpedia_uri, 'response': original_response }
+
 
 
 def get_or_create_tag(tag_label):
@@ -60,25 +95,41 @@
 
     if created:
         site = wiki.Wiki(settings.WIKIPEDIA_API_URL) #@UndefinedVariable
-        new_label, status, url, pageid, response = query_wikipedia_title(site, tag_label_normalized) #@UnusedVariable
+        wp_res = query_wikipedia_title(site, tag_label_normalized) #@UnusedVariable
+        new_label, status, url, pageid, dbpedia_uri = wp_res['new_label'], wp_res['status'], wp_res['wikipedia_url'], wp_res['pageid'], wp_res["dbpedia_uri"]
+
     
         # We save the datas
         if new_label is not None:
             tag.label = new_label
         if status is not None:
             tag.url_status = status
-        if url is not None:
-            tag.wikipedia_url = url
-        else:
-            tag.wikipedia_url = None
-            
-        if pageid is not None:
-            tag.wikipedia_pageid = pageid
-        else:
-            tag.wikipedia_pageid = None
+        tag.wikipedia_url = url            
+        tag.wikipedia_pageid = pageid
+        tag.dbpedia_uri = dbpedia_uri 
 
         tag.save()
         
     return tag, created
 
+def process_tag(site, tag, verbosity):
     
+    wp_res = query_wikipedia_title(site, tag.label)
+    new_label, status, url, pageid, response, dbpedia_uri = wp_res['new_label'], wp_res['status'], wp_res['wikipedia_url'], wp_res['pageid'], wp_res['response'], wp_res["dbpedia_uri"]
+    
+    if verbosity >= 2 :
+        print "response from query to %s with parameters %s :" % (site.apibase, repr(new_label))
+        print repr(response)
+    
+    if new_label is not None:
+        tag.label = new_label
+    if status is not None:
+        tag.url_status = status
+    tag.wikipedia_url = url
+    tag.wikipedia_pageid = pageid
+    tag.dbpedia_uri = dbpedia_uri
+        
+    tag.save()
+
+
+

author	ymh <ymh.work@gmail.com>
	Wed, 22 Jun 2011 01:00:47 +0200
changeset 47	08b008c5a07d
parent 46	3ad571e54608
child 48	0bebe36ee79f

.settings/org.eclipse.core.resources.prefs		file \| annotate \| diff \| comparison \| revisions
sql/create_db.sql		file \| annotate \| diff \| comparison \| revisions
web/hdabo/fixtures/datasheet_10.yaml.bz2		file \| annotate \| diff \| comparison \| revisions
web/hdabo/fixtures/datasheet_347.yaml.bz2		file \| annotate \| diff \| comparison \| revisions
web/hdabo/fixtures/initial_data.yaml.bz2		file \| annotate \| diff \| comparison \| revisions
web/hdabo/management/commands/import_csv.py		file \| annotate \| diff \| comparison \| revisions
web/hdabo/management/commands/import_tag_popularity.py		file \| annotate \| diff \| comparison \| revisions
web/hdabo/management/commands/importcsv.py		file \| annotate \| diff \| comparison \| revisions
web/hdabo/management/commands/order_tags.py		file \| annotate \| diff \| comparison \| revisions
web/hdabo/management/commands/ordertags.py		file \| annotate \| diff \| comparison \| revisions
web/hdabo/management/commands/query_wikipedia.py		file \| annotate \| diff \| comparison \| revisions
web/hdabo/management/commands/querywikipedia.py		file \| annotate \| diff \| comparison \| revisions
web/hdabo/models.py		file \| annotate \| diff \| comparison \| revisions
web/hdabo/settings.py		file \| annotate \| diff \| comparison \| revisions
web/hdabo/static/hdabo/img/arrow_green_right.png		file \| annotate \| diff \| comparison \| revisions
web/hdabo/templates/partial/all_tags_table.html		file \| annotate \| diff \| comparison \| revisions
web/hdabo/templates/partial/tag_table.html		file \| annotate \| diff \| comparison \| revisions
web/hdabo/views.py		file \| annotate \| diff \| comparison \| revisions
web/hdabo/wp_utils.py		file \| annotate \| diff \| comparison \| revisions