- add popularity
- calculate dbpedia_uri
- display dbpedia uri
- add manual_order
- various corrections
--- a/.settings/org.eclipse.core.resources.prefs Mon Jun 20 15:49:22 2011 +0200
+++ b/.settings/org.eclipse.core.resources.prefs Wed Jun 22 01:00:47 2011 +0200
@@ -1,4 +1,4 @@
-#Fri Jun 17 01:31:06 CEST 2011
+#Fri Jun 17 17:33:03 CEST 2011
eclipse.preferences.version=1
encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/haystack/backends/__init__.py=utf-8
encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/fields.py=utf-8
@@ -8,8 +8,9 @@
encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/wiki.py=utf-8
encoding//web/hdabo/fields.py=utf-8
encoding//web/hdabo/forms.py=utf-8
-encoding//web/hdabo/management/commands/importcsv.py=utf-8
-encoding//web/hdabo/management/commands/querywikipedia.py=utf-8
+encoding//web/hdabo/management/commands/import_csv.py=utf-8
+encoding//web/hdabo/management/commands/import_tag_popularity.py=utf-8
+encoding//web/hdabo/management/commands/query_wikipedia.py=utf-8
encoding//web/hdabo/models.py=utf-8
encoding//web/hdabo/search/french_whoosh_backend.py=utf-8
encoding//web/hdabo/tests/models.py=utf-8
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sql/create_db.sql Wed Jun 22 01:00:47 2011 +0200
@@ -0,0 +1,7 @@
+CREATE DATABASE hdabo
+ WITH ENCODING='UTF8'
+ OWNER=iri
+ TEMPLATE=template0
+ LC_COLLATE='fr_FR.UTF-8'
+ LC_CTYPE='fr_FR.UTF-8'
+ CONNECTION LIMIT=-1;
\ No newline at end of file
Binary file web/hdabo/fixtures/datasheet_10.yaml.bz2 has changed
Binary file web/hdabo/fixtures/datasheet_347.yaml.bz2 has changed
Binary file web/hdabo/fixtures/initial_data.yaml.bz2 has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdabo/management/commands/import_csv.py Wed Jun 22 01:00:47 2011 +0200
@@ -0,0 +1,257 @@
+# -*- coding: utf-8 -*-
+'''
+Created on May 25, 2011
+
+@author: ymh
+'''
+#Auteur,Chemin,Comment,Controle,Datcre,Datmaj,Desc,Domaine,Format,ID,Insee,Org,Org_Home,OrgID,Periode1,Periode2,Periode3,Satut,Sousdom,Tag,Theme2,Theme3,Titre,Url,Vignette,Ville
+#"Auteur","Chemin","Comment","Controle","Datcre","Datmaj","Desc","Domaine","Format","ID","Insee","Org","Org_Home","OrgID","Periode1","Periode2","Periode3","Satut","Sousdom","Tag","Theme2","Theme3","Titre","Url","Vignette","Ville",
+
+from django.core.management.base import BaseCommand, CommandError
+from django.db import transaction
+from hdabo.models import (Author, Datasheet, DocumentFormat, Domain, Organisation,
+ Tag, TaggedSheet, TimePeriod, Location)
+from hdabo.wp_utils import normalize_tag
+from optparse import make_option
+import csv
+import datetime
+import math
+import sys
+
+class Command(BaseCommand):
+ '''
+ Command to import csvfile
+ '''
+ args = '<path_to_csv_file path_to_csv_file ...>'
+ options = '[--ignore-existing] [--lines] [--encoding]'
+ help = """Import of a csv file for hdabo
+Options:
+ --ignore-existing : ignore existing datasheets
+ --lines : max number of lines to load (for each file). 0 means all.
+ --encoding : files encoding. default to latin-1"""
+
+ option_list = BaseCommand.option_list + (
+ make_option('--encoding',
+ action='store',
+ type='string',
+ dest='encoding',
+ default="latin-1",
+ help='fix the file encoding. default to latin-1'),
+ make_option('--delimiter',
+ action='store',
+ type='string',
+ dest='delimiter',
+ default=";",
+ help='csv file delimiter'),
+ make_option('--dialect',
+ action='store',
+ type='string',
+ dest='dialect',
+ default="excel",
+ help='csv dialect'),
+ make_option('--fieldnames',
+ action='store',
+ type='string',
+ dest='fieldnames',
+ default=None,
+ help='fields list (comma separated)'),
+ make_option('--lines',
+ action='store',
+ type='int',
+ dest='lines',
+ default=0,
+ help='Number of lines to read. 0 means all.'),
+ make_option('--ignore-existing',
+ action='store_true',
+ dest='ignore_existing',
+ default=False,
+ help='force insertion'),
+
+ )
+
+ def show_progress(self, current_line, total_line, width):
+
+ percent = (float(current_line) / float(total_line)) * 100.0
+
+ marks = math.floor(width * (percent / 100.0))
+ spaces = math.floor(width - marks)
+
+ loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']'
+
+ sys.stdout.write("%s %d%% %d/%d\r" % (loader, percent, current_line - 1, total_line - 1)) #takes the header into account
+ if percent >= 100:
+ sys.stdout.write("\n")
+ sys.stdout.flush()
+
+
+ def create_domain_period(self, row_value, klass, school_period):
+ res_list = []
+ if not row_value:
+ return res_list
+ for label_str in [dstr.strip() for dstr in row_value.split('\x0b')]:
+ if label_str:
+ res_obj, created = klass.objects.get_or_create(label=label_str, school_period=school_period, defaults={"label":label_str, "school_period":school_period}) #@UnusedVariable
+ res_list.append(res_obj)
+ return res_list
+
+ def create_datasheet(self, row):
+
+ if self.ignore_existing and Datasheet.objects.filter(hda_id=row[u"ID"]).count() > 0:
+ return
+
+ author_str = row[u'Auteur']
+ if author_str:
+ author_array = author_str.split(" ")
+ if len(author_array) == 0:
+ firstname = ""
+ lastname = ""
+ elif len(author_array) == 1:
+ firstname = ""
+ lastname = author_array[0]
+ elif len(author_array) == 2:
+ firstname = author_array[0]
+ lastname = author_array[1]
+
+ author, created = Author.objects.get_or_create(hda_id=author_str, defaults={"firstname":firstname, "lastname":lastname}) #@UnusedVariable
+ else:
+ author = None
+
+ org_str = row[u"Org"]
+ if org_str:
+ url_str = row[u'Org_Home']
+ if url_str is not None:
+ url_str = url_str.strip()
+ org, created = Organisation.objects.get_or_create(hda_id=org_str, defaults={"name":org_str, "website" : url_str}) #@UnusedVariable
+ else:
+ org = None
+
+ town_str = row[u"Ville"]
+ if town_str:
+ insee_str = row[u'Insee'].strip() if row[u'Insee'] else row[u'Insee']
+ if len(insee_str) > 5:
+ insee_str = ""
+ loc, created = Location.objects.get_or_create(insee=insee_str, defaults={"name": town_str, "insee": insee_str}) #@UnusedVariable
+ else:
+ loc = None
+
+ format_str = row[u"Format"]
+ if format_str:
+ format, created = DocumentFormat.objects.get_or_create(label=format_str, defaults={"label": format_str}) #@UnusedVariable
+ else:
+ format = None
+
+ domains = self.create_domain_period(row[u"Domaine"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Global'])
+
+ primary_periods = self.create_domain_period(row[u"Periode1"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Primaire'])
+ college_periods = self.create_domain_period(row[u"Periode2"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Collège'])
+ highschool_periods = self.create_domain_period(row[u"Periode3"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Lycée'])
+
+ primary_themes = self.create_domain_period(row[u"Sousdom"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Primaire'])
+ college_themes = self.create_domain_period(row[u"Theme2"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Collège'])
+ highschool_themes = self.create_domain_period(row[u"Theme3"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Lycée'])
+
+ url = row[u"Url"]
+ if url is not None:
+ url = url.strip()
+
+ datasheet = Datasheet.objects.create(
+ hda_id=row[u"ID"],
+ author=author,
+ organisation=org,
+ title=row[u"Titre"],
+ description=row[u"Desc"],
+ url=url,
+ town=loc,
+ format=format,
+ original_creation_date=datetime.datetime.strptime(row[u"Datcre"], "%d/%m/%Y").date(),
+ original_modification_date=datetime.datetime.strptime(row[u"Datmaj"], "%d/%m/%Y").date(),
+ validated=False
+ )
+
+ datasheet.save()
+
+ datasheet.domains = domains
+ datasheet.primary_periods = primary_periods
+ datasheet.college_periods = college_periods
+ datasheet.highschool_periods = highschool_periods
+ datasheet.primary_themes = primary_themes
+ datasheet.college_themes = college_themes
+ datasheet.highschool_themes = highschool_themes
+
+
+ if row[u'Tag']:
+ for i, tag in enumerate([t.strip() for t in row[u'Tag'].split(u";")]):
+ if len(tag) == 0:
+ continue
+ tag_label = normalize_tag(tag)
+ tag_obj, created = Tag.objects.get_or_create(label__iexact=tag_label, defaults={'label':tag_label, 'original_label':tag}) #@UnusedVariable
+ tagged_ds = TaggedSheet(datasheet=datasheet, tag=tag_obj, original_order=i + 1, order=i + 1)
+ tagged_ds.save()
+
+
+ def handle(self, *args, **options):
+
+ if len(args) == 0:
+ raise CommandError("Gives at lat one csv file to import")
+
+ self.encoding = options.get('encoding', "latin-1")
+ lines = options.get('lines', 0)
+ self.ignore_existing = options.get('ignore_existing', False)
+ fieldnames = options.get('fieldnames', None)
+
+ transaction.commit_unless_managed()
+ transaction.enter_transaction_management()
+ transaction.managed(True)
+
+ try:
+ for csv_path in args:
+ try:
+ print "Processing %s " % (csv_path)
+ with open(csv_path, 'rU') as csv_file:
+
+ # get the number of lines if necessary
+ if not lines:
+ for i, l in enumerate(csv_file): #@UnusedVariable
+ pass
+ total_line = i + 1
+ if fieldnames:
+ total_line = total_line + 1
+ csv_file.seek(0)
+ else:
+ total_line = lines + 1
+
+ delimiter = options.get('delimiter', ";")
+ if delimiter == "TAB" or delimiter == "\\t":
+ delimiter = '\t'
+
+ dr_kwargs = {'delimiter':delimiter}
+ if fieldnames is not None:
+ dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")]
+ dialect = options.get('dialect', "excel")
+ if dialect is not None:
+ dr_kwargs['dialect'] = dialect
+
+ reader = csv.DictReader(csv_file, **dr_kwargs)
+
+ for j, row in enumerate(reader):
+ if lines and j >= lines:
+ break
+ line_num = reader.line_num if fieldnames is None else reader.line_num + 1
+ self.show_progress(line_num, total_line, 60)
+ def safe_decode(val, encoding):
+ if val:
+ return val.decode(encoding)
+ else:
+ return val
+
+ row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()])
+ self.create_datasheet(row)
+
+ transaction.commit()
+ except Exception:
+ transaction.rollback()
+ raise
+ finally:
+ print('')
+ finally:
+ transaction.leave_transaction_management()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdabo/management/commands/import_tag_popularity.py Wed Jun 22 01:00:47 2011 +0200
@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jun 17, 2011
+
+@author: ymh
+
+command to import tag popularity
+
+'''
+
+from django.core.management.base import BaseCommand, CommandError
+from hdabo.models import Tag
+from optparse import make_option
+import csv
+import math
+import sys
+
+
+class Command(BaseCommand):
+ '''
+ Command to import csvfile
+ '''
+ args = '<path_to_csv_file path_to_csv_file ...>'
+ options = '[--ignore-existing] [--lines] [--encoding]'
+ help = """Import of a tag popularity file for hdabo
+Options:
+ --ignore-existing : ignore existing datasheets
+ --lines : max number of lines to load (for each file). 0 means all.
+ --encoding : files encoding. default to latin-1"""
+
+ option_list = BaseCommand.option_list + (
+ make_option('--encoding',
+ action='store',
+ type='string',
+ dest='encoding',
+ default="latin-1",
+ help='fix the file encoding. default to latin-1'),
+ make_option('--delimiter',
+ action='store',
+ type='string',
+ dest='delimiter',
+ default=";",
+ help='csv file delimiter'),
+ make_option('--dialect',
+ action='store',
+ type='string',
+ dest='dialect',
+ default="excel",
+ help='csv dialect'),
+ make_option('--fieldnames',
+ action='store',
+ type='string',
+ dest='fieldnames',
+ default="label,popularity",
+ help='fields list (comma separated)'),
+ make_option('--lines',
+ action='store',
+ type='int',
+ dest='lines',
+ default=0,
+ help='Number of lines to read. 0 means all.'),
+
+ )
+
+ def show_progress(self, current_line, total_line, width):
+
+ percent = (float(current_line) / float(total_line)) * 100.0
+
+ marks = math.floor(width * (percent / 100.0))
+ spaces = math.floor(width - marks)
+
+ loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']'
+
+ sys.stdout.write("%s %d%% %d/%d\r" % (loader, percent, current_line - 1, total_line - 1)) #takes the header into account
+ if percent >= 100:
+ sys.stdout.write("\n")
+ sys.stdout.flush()
+
+ def handle(self, *args, **options):
+
+ if len(args) == 0:
+ raise CommandError("Give one csv file to import")
+ elif len(args) > 1:
+ raise CommandError("Only one file can be imported")
+
+ self.encoding = options.get('encoding', "latin-1")
+ lines = options.get('lines', 0)
+ fieldnames = options.get('fieldnames', "label,popularity")
+
+ csv_path = args[0]
+
+ print("Processing %s " % (csv_path))
+
+ with open(csv_path, 'rU') as csv_file:
+ # get the number of lines if necessary
+ if not lines:
+ for i, l in enumerate(csv_file): #@UnusedVariable
+ pass
+ total_line = i + 1
+ if fieldnames:
+ total_line = total_line + 1
+ csv_file.seek(0)
+ else:
+ total_line = lines + 1
+
+ delimiter = options.get('delimiter', ";")
+ if delimiter == "TAB" or delimiter == "\\t":
+ delimiter = '\t'
+ dr_kwargs = {'delimiter':delimiter}
+ if fieldnames is not None:
+ dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")]
+ dialect = options.get('dialect', "excel")
+ if dialect is not None:
+ dr_kwargs['dialect'] = dialect
+
+ reader = csv.DictReader(csv_file, **dr_kwargs)
+
+ for j, row in enumerate(reader):
+ if lines and j >= lines:
+ break
+ line_num = reader.line_num if fieldnames is None else reader.line_num + 1
+ self.show_progress(line_num, total_line, 60)
+ def safe_decode(val, encoding):
+ if val:
+ return val.decode(encoding)
+ else:
+ return val
+
+ row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()])
+
+ label = row['label'].strip()
+
+ if not label:
+ continue
+
+ for tag in Tag.objects.filter(label__iexact=label):
+ tag.popularity = int(row['popularity'])
+ tag.save()
--- a/web/hdabo/management/commands/importcsv.py Mon Jun 20 15:49:22 2011 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,253 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Created on May 25, 2011
-
-@author: ymh
-'''
-#Auteur,Chemin,Comment,Controle,Datcre,Datmaj,Desc,Domaine,Format,ID,Insee,Org,Org_Home,OrgID,Periode1,Periode2,Periode3,Satut,Sousdom,Tag,Theme2,Theme3,Titre,Url,Vignette,Ville
-#"Auteur","Chemin","Comment","Controle","Datcre","Datmaj","Desc","Domaine","Format","ID","Insee","Org","Org_Home","OrgID","Periode1","Periode2","Periode3","Satut","Sousdom","Tag","Theme2","Theme3","Titre","Url","Vignette","Ville",
-
-from django.core.management.base import BaseCommand, CommandError
-from django.db import transaction
-from hdabo.models import (Author, Datasheet, DocumentFormat, Domain, Organisation,
- Tag, TaggedSheet, TimePeriod, Location)
-from hdabo.wp_utils import normalize_tag
-from optparse import make_option
-import csv
-import datetime
-import math
-import sys
-
-class Command(BaseCommand):
- '''
- Command to import csvfile
- '''
- args = '<path_to_csv_file path_to_csv_file ...>'
- options = '[--ignore-existing] [--lines] [--encoding]'
- help = """Import of a csv file for hdabo
-Options:
- --ignore-existing : ignore existing datasheets
- --lines : max number of lines to load (for each file). 0 means all.
- --encoding : files encoding. default to latin-1"""
-
- option_list = BaseCommand.option_list + (
- make_option('--encoding',
- action='store',
- type='string',
- dest='encoding',
- default="latin-1",
- help='fix the file encoding. default to latin-1'),
- make_option('--delimiter',
- action='store',
- type='string',
- dest='delimiter',
- default=";",
- help='csv file delimiter'),
- make_option('--dialect',
- action='store',
- type='string',
- dest='dialect',
- default="excel",
- help='csv dialect'),
- make_option('--fieldnames',
- action='store',
- type='string',
- dest='fieldnames',
- default=None,
- help='fields list (comma separated)'),
- make_option('--lines',
- action='store',
- type='int',
- dest='lines',
- default=0,
- help='Number of lines to read. 0 means all.'),
- make_option('--ignore-existing',
- action='store_true',
- dest='ignore_existing',
- default=False,
- help='force insertion'),
-
- )
-
- def show_progress(self, current_line, total_line, width):
-
- percent = (float(current_line) / float(total_line)) * 100.0
-
- marks = math.floor(width * (percent / 100.0))
- spaces = math.floor(width - marks)
-
- loader = '[' + ('=' * int(marks)) + (' ' * int(spaces)) + ']'
-
- sys.stdout.write("%s %d%% %d/%d\r" % (loader, percent, current_line - 1, total_line - 1)) #takes the header into account
- if percent >= 100:
- sys.stdout.write("\n")
- sys.stdout.flush()
-
-
- def create_domain_period(self, row_value, klass, school_period):
- res_list = []
- if not row_value:
- return res_list
- for label_str in [dstr.strip() for dstr in row_value.split('\x0b')]:
- if label_str:
- res_obj, created = klass.objects.get_or_create(label=label_str, school_period=school_period, defaults={"label":label_str, "school_period":school_period}) #@UnusedVariable
- res_list.append(res_obj)
- return res_list
-
- def create_datasheet(self, row):
-
- if self.ignore_existing and Datasheet.objects.filter(hda_id=row[u"ID"]).count() > 0:
- return
-
- author_str = row[u'Auteur']
- if author_str:
- author_array = author_str.split(" ")
- if len(author_array) == 0:
- firstname = ""
- lastname = ""
- elif len(author_array) == 1:
- firstname = ""
- lastname = author_array[0]
- elif len(author_array) == 2:
- firstname = author_array[0]
- lastname = author_array[1]
-
- author, created = Author.objects.get_or_create(hda_id=author_str, defaults={"firstname":firstname, "lastname":lastname}) #@UnusedVariable
- else:
- author = None
-
- org_str = row[u"Org"]
- if org_str:
- url_str = row[u'Org_Home']
- if url_str is not None:
- url_str = url_str.strip()
- org, created = Organisation.objects.get_or_create(hda_id=org_str, defaults={"name":org_str, "website" : url_str}) #@UnusedVariable
- else:
- org = None
-
- town_str = row[u"Ville"]
- if town_str:
- insee_str = row[u'Insee'].strip() if row[u'Insee'] else row[u'Insee']
- if len(insee_str) > 5:
- insee_str = ""
- loc, created = Location.objects.get_or_create(insee=insee_str, defaults={"name": town_str, "insee": insee_str}) #@UnusedVariable
- else:
- loc = None
-
- format_str = row[u"Format"]
- if format_str:
- format, created = DocumentFormat.objects.get_or_create(label=format_str, defaults={"label": format_str}) #@UnusedVariable
- else:
- format = None
-
- domains = self.create_domain_period(row[u"Domaine"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Global'])
-
- primary_periods = self.create_domain_period(row[u"Periode1"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Primaire'])
- college_periods = self.create_domain_period(row[u"Periode2"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Collège'])
- highschool_periods = self.create_domain_period(row[u"Periode3"], TimePeriod, TimePeriod.TIME_PERIOD_DICT[u'Lycée'])
-
- primary_themes = self.create_domain_period(row[u"Sousdom"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Primaire'])
- college_themes = self.create_domain_period(row[u"Theme2"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Collège'])
- highschool_themes = self.create_domain_period(row[u"Theme3"], Domain, Domain.DOMAIN_PERIOD_DICT[u'Lycée'])
-
- url = row[u"Url"]
- if url is not None:
- url = url.strip()
-
- datasheet = Datasheet.objects.create(
- hda_id=row[u"ID"],
- author=author,
- organisation=org,
- title=row[u"Titre"],
- description=row[u"Desc"],
- url=url,
- town=loc,
- format=format,
- original_creation_date=datetime.datetime.strptime(row[u"Datcre"], "%d/%m/%Y").date(),
- original_modification_date=datetime.datetime.strptime(row[u"Datmaj"], "%d/%m/%Y").date(),
- validated=False
- )
-
- datasheet.save()
-
- datasheet.domains = domains
- datasheet.primary_periods = primary_periods
- datasheet.college_periods = college_periods
- datasheet.highschool_periods = highschool_periods
- datasheet.primary_themes = primary_themes
- datasheet.college_themes = college_themes
- datasheet.highschool_themes = highschool_themes
-
-
- if row[u'Tag']:
- for i, tag in enumerate([t.strip() for t in row[u'Tag'].split(u";")]):
- if len(tag) == 0:
- continue
- tag_label = normalize_tag(tag)
- tag_obj, created = Tag.objects.get_or_create(label__iexact=tag_label, defaults={'label':tag_label, 'original_label':tag}) #@UnusedVariable
- tagged_ds = TaggedSheet(datasheet=datasheet, tag=tag_obj, original_order=i + 1, order=i + 1)
- tagged_ds.save()
-
-
- def handle(self, *args, **options):
-
- if len(args) == 0:
- raise CommandError("Gives at lat one csv file to import")
-
- self.encoding = options.get('encoding', "latin-1")
- lines = options.get('lines', 0)
- self.ignore_existing = options.get('ignore_existing', False)
- fieldnames = options.get('fieldnames', None)
-
- transaction.commit_unless_managed()
- transaction.enter_transaction_management()
- transaction.managed(True)
-
- try:
- for csv_path in args:
- try:
- print "Processing %s " % (csv_path)
- with open(csv_path, 'rU') as csv_file:
-
- # get the number of lines if necessary
- if not lines:
- for i, l in enumerate(csv_file): #@UnusedVariable
- pass
- total_line = i + 1
- if fieldnames:
- total_line = total_line + 1
- csv_file.seek(0)
- else:
- total_line = lines + 1
-
- dr_kwargs = {'delimiter':options.get('delimiter', ";")}
- if fieldnames is not None:
- dr_kwargs['fieldnames'] = [f.strip() for f in fieldnames.split(",")]
- dialect = options.get('dialect', "excel")
- if dialect is not None:
- dr_kwargs['dialect'] = dialect
-
- reader = csv.DictReader(csv_file, **dr_kwargs)
-
- for j, row in enumerate(reader):
- if lines and j >= lines:
- break
- line_num = reader.line_num if fieldnames is None else reader.line_num + 1
- self.show_progress(line_num, total_line, 60)
- def safe_decode(val, encoding):
- if val:
- return val.decode(encoding)
- else:
- return val
-
- row = dict([(safe_decode(key, self.encoding), safe_decode(value, self.encoding)) for key, value in row.items()])
- self.create_datasheet(row)
-
- transaction.commit()
- except Exception:
- transaction.rollback()
- raise
- finally:
- print('')
- finally:
- transaction.leave_transaction_management()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdabo/management/commands/order_tags.py Wed Jun 22 01:00:47 2011 +0200
@@ -0,0 +1,105 @@
+'''
+Created on Jun 7, 2011
+
+@author: ymh
+'''
+
+from django.core.management.base import NoArgsCommand
+from django.core.management.color import no_style
+from haystack.constants import DJANGO_ID
+from haystack.query import SearchQuerySet
+from hdabo.models import Datasheet
+import math
+import sys
+from optparse import make_option
+from django.db import transaction
+
+
+class Command(NoArgsCommand):
+ '''
+ Command to calculate the order of tags based on indexation
+ recalculate all tags. Will ask for confirmation
+ '''
+
+ args = ''
+ options = '-f : force '
+ help = "calculate the order of tags based on indexation recalculate all tags. Will ask for confirmation"
+
+ option_list = NoArgsCommand.option_list + (
+ make_option('-f', '--force',
+ action='store_true',
+ dest='force',
+ default=False,
+ help='force reordering of all datasheets'),
+ )
+
+
+ def show_progress(self, current_line, total_line, width):
+
+ percent = (float(current_line) / float(total_line)) * 100.0
+
+ marks = math.floor(width * (percent / 100.0))
+ spaces = math.floor(width - marks)
+
+ loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
+
+ sys.stdout.write(u"%s %d%% %d/%d\r" % (loader, percent, current_line, total_line)) #takes the header into account
+ if percent >= 100:
+ sys.stdout.write("\n")
+ sys.stdout.flush()
+
+
+ def handle_noargs(self, **options):
+
+ self.style = no_style()
+
+ interactive = options.get('interactive', True)
+ force = options.get('force', True)
+
+ if interactive:
+ confirm = raw_input("""You have requested to recalculate the index order of all the tags.
+This will process all the tags in %s datasheets. Are you sure you want to do this ?
+ Type 'yes' to continue, or 'no' to cancel: """ % ("all" if force else "not validated"))
+ else:
+ confirm = 'yes'
+
+ if confirm != "yes":
+ print "Tag reordering cancelled"
+ return
+
+ if force:
+ queryset = Datasheet.objects.all()
+ else:
+ queryset = Datasheet.objects.filter(validated=False, manual_order=False)
+ total = queryset.count()
+
+ transaction.commit_unless_managed()
+ transaction.enter_transaction_management()
+ transaction.managed(True)
+
+ try:
+ for i, ds in enumerate(queryset):
+ self.show_progress(i + 1, total, 60)
+ ts_list = []
+ for ts in ds.taggedsheet_set.all():
+ kwargs = {DJANGO_ID + "__exact": unicode(ds.pk)}
+ results = SearchQuerySet().filter(title=ts.tag.label).filter_or(description=ts.tag.label).filter(**kwargs)
+ if len(results) > 0:
+ ts.index_note = results[0].score
+ ts.save()
+ ts_list.append(ts)
+ ts_list.sort(key=lambda t: (-t.index_note, t.order))
+ for i, ts in enumerate(ts_list):
+ ts.order = i + 1
+ ts.save()
+ if ds.manual_order:
+ ds.manual_order = False
+ ds.save()
+ transaction.commit()
+ except:
+ transaction.rollback()
+ raise
+ finally:
+ transaction.leave_transaction_management()
+
+
--- a/web/hdabo/management/commands/ordertags.py Mon Jun 20 15:49:22 2011 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,74 +0,0 @@
-'''
-Created on Jun 7, 2011
-
-@author: ymh
-'''
-
-from django.core.management.base import NoArgsCommand
-from django.core.management.color import no_style
-from haystack.constants import DJANGO_ID
-from haystack.query import SearchQuerySet
-from hdabo.models import Datasheet
-import math
-import sys
-
-
-class Command(NoArgsCommand):
- '''
- Command to calculate the order of tags based on indexation
- recalculate all tags. Will ask for confirmation
- '''
-
- args = ''
- options = ''
- help = "calculate the order of tags based on indexation recalculate all tags. Will ask for confirmation"
-
- def show_progress(self, current_line, total_line, width):
-
- percent = (float(current_line) / float(total_line)) * 100.0
-
- marks = math.floor(width * (percent / 100.0))
- spaces = math.floor(width - marks)
-
- loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
-
- sys.stdout.write(u"%s %d%% %d/%d\r" % (loader, percent, current_line, total_line)) #takes the header into account
- if percent >= 100:
- sys.stdout.write("\n")
- sys.stdout.flush()
-
-
- def handle_noargs(self, **options):
-
- self.style = no_style()
-
- interactive = options.get('interactive', True)
-
- if interactive:
- confirm = raw_input("""You have requested to recalculate the index order of all the tags.
-This will process all the tags in all datasheets. Are you sure you want to do this ?
- Type 'yes' to continue, or 'no' to cancel: """)
- else:
- confirm = 'yes'
-
- if confirm != "yes":
- print "Tag reordering cancelled"
- return
-
- total = Datasheet.objects.all().count()
-
- for i, ds in enumerate(Datasheet.objects.all()):
- self.show_progress(i + 1, total, 60)
- ts_list = []
- for ts in ds.taggedsheet_set.all():
- kwargs = {DJANGO_ID + "__exact": unicode(ds.pk)}
- results = SearchQuerySet().filter(title=ts.tag.label).filter_or(description=ts.tag.label).filter(**kwargs)
- if len(results) > 0:
- ts.index_note = results[0].score
- ts.save()
- ts_list.append(ts)
- ts_list.sort(key=lambda t: (-t.index_note, t.order))
- for i, ts in enumerate(ts_list):
- ts.order = i + 1
- ts.save()
-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdabo/management/commands/query_wikipedia.py Wed Jun 22 01:00:47 2011 +0200
@@ -0,0 +1,174 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jun 7, 2011
+
+@author: ymh
+'''
+
+from django.conf import settings
+from django.core.management.base import NoArgsCommand
+from django.core.management.color import no_style
+from hdabo.models import Tag
+from hdabo.wp_utils import process_tag
+from optparse import make_option
+from wikitools import wiki
+import math
+import sys
+
+
+
+class Command(NoArgsCommand):
+ '''
+ query and update wikipedia for tag title.
+ '''
+ options = ''
+ help = """query and update wikipedia for tag title."""
+
+ option_list = NoArgsCommand.option_list + (
+ make_option('--force',
+ action='store_true',
+ dest='force',
+ default=False,
+ help='force all tags to be updated, not only those not yet processed'),
+ make_option('--random',
+ action='store_true',
+ dest='random',
+ default=False,
+ help='randomize query on tags'),
+ make_option('--site',
+ action='store',
+ type='string',
+ dest='site_url',
+ default="http://fr.wikipedia.org/w/api.php",
+ help='the url for the wikipedia site'),
+ make_option('--limit',
+ action='store',
+ type='int',
+ dest='limit',
+ default= -1,
+ help='number of tag to process'),
+ make_option('--start',
+ action='store',
+ type='int',
+ dest='start',
+ default=0,
+ help='number of tag to ignore'),
+ )
+
+ def __is_homonymie(self, page_dict):
+ for cat in page_dict.get(u"categories", []):
+ if u'Catégorie:Homonymie' in cat.get(u"title", u"") or u'Category:Disambiguation pages' in cat.get(u"title", u""):
+ return True
+ return False
+
+
+ def process_wp_response(self, label, response):
+
+
+ query_dict = response['query']
+ # get page if multiple pages or none -> return Tag.null_result
+ pages = query_dict.get("pages", {})
+ if len(pages) > 1 or len(pages) == 0:
+ return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
+
+ page = pages.values()[0]
+
+ if u"invalid" in page or u"missing" in page:
+ return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
+
+ url = page.get(u'fullurl', None)
+ pageid = page.get(u'pageid', None)
+ new_label = page[u'title']
+
+ if self.__is_homonymie(page):
+ status = Tag.TAG_URL_STATUS_DICT["homonyme"]
+ elif u"redirect" in page:
+ status = Tag.TAG_URL_STATUS_DICT["redirection"]
+ else:
+ status = Tag.TAG_URL_STATUS_DICT["match"]
+
+ return new_label, status, url, pageid
+
+ def show_progress(self, current_line, total_line, label, width):
+
+ percent = (float(current_line) / float(total_line)) * 100.0
+
+ marks = math.floor(width * (percent / 100.0))
+ spaces = math.floor(width - marks)
+
+ loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
+
+ sys.stdout.write(u"%s %d%% %d/%d - %s\r" % (loader, percent, current_line - 1, total_line - 1, repr(label))) #takes the header into account
+ if percent >= 100:
+ sys.stdout.write("\n")
+ sys.stdout.flush()
+
+ def handle_noargs(self, **options):
+
+ self.style = no_style()
+
+ interactive = options.get('interactive', True)
+
+ verbosity = int(options.get('verbosity', '1'))
+
+ force = options.get('force', False)
+
+ limit = options.get("limit", -1)
+ start = options.get("start", 0)
+
+ site_url = options.get('site_url', settings.WIKIPEDIA_API_URL)
+
+ random = options.get('random', False)
+
+ if verbosity > 2:
+ print "option passed : " + repr(options)
+
+ if force and interactive:
+ confirm = raw_input("""You have requested to query and replace the wikipedia information for all datasheets.
+Are you sure you want to do this ?
+ Type 'yes' to continue, or 'no' to cancel: """)
+ else:
+ confirm = 'yes'
+
+ if confirm != "yes":
+ print "wikipedia query cancelled"
+ return
+
+ if force:
+ queryset = Tag.objects.all()
+ else:
+ queryset = Tag.objects.filter(url_status=None)
+
+ if random:
+ queryset = queryset.order_by("?")
+ else:
+ queryset = queryset.order_by("label")
+
+ if limit >= 0:
+ queryset = queryset[start:limit]
+ else:
+ queryset = queryset[start:]
+
+
+ if verbosity > 2 :
+ print "Tag Query is %s" % (queryset.query)
+
+ site = wiki.Wiki(site_url) #@UndefinedVariable
+
+
+ count = queryset.count()
+ if verbosity > 1:
+ print "Processing %d tags" % (count)
+
+
+
+ for i, tag in enumerate(queryset):
+
+ if verbosity > 1:
+ print "processing tag %s (%d/%d)" % (tag.label, i + 1, count)
+ else:
+ self.show_progress(i + 1, count, tag.label, 60)
+
+ process_tag(site, tag, verbosity)
+
+
--- a/web/hdabo/management/commands/querywikipedia.py Mon Jun 20 15:49:22 2011 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,193 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Created on Jun 7, 2011
-
-@author: ymh
-'''
-
-from django.conf import settings
-from django.core.management.base import NoArgsCommand
-from django.core.management.color import no_style
-from hdabo.models import Tag
-from hdabo.wp_utils import query_wikipedia_title
-from optparse import make_option
-from wikitools import wiki
-import math
-import sys
-
-
-def process_tag(site, tag, verbosity):
- new_label, status, url, pageid, response = query_wikipedia_title(site, tag.label)
-
- if verbosity >= 2 :
- print "response from query to %s with parameters %s :" % (site.apibase, repr(new_label))
- print repr(response)
-
- if new_label is not None:
- tag.label = new_label
- if status is not None:
- tag.url_status = status
- if url is not None:
- tag.wikipedia_url = url
- if pageid is not None:
- tag.wikipedia_pageid = pageid
-
- tag.save()
-
-
-
-class Command(NoArgsCommand):
- '''
- query and update wikipedia for tag title.
- '''
- options = ''
- help = """query and update wikipedia for tag title."""
-
- option_list = NoArgsCommand.option_list + (
- make_option('--force',
- action='store_true',
- dest='force',
- default=False,
- help='force all tags to be updated, not only those not yet processed'),
- make_option('--random',
- action='store_true',
- dest='random',
- default=False,
- help='randomize query on tags'),
- make_option('--site',
- action='store',
- type='string',
- dest='site_url',
- default="http://fr.wikipedia.org/w/api.php",
- help='the url for the wikipedia site'),
- make_option('--limit',
- action='store',
- type='int',
- dest='limit',
- default= -1,
- help='number of tag to process'),
- make_option('--start',
- action='store',
- type='int',
- dest='start',
- default=0,
- help='number of tag to ignore'),
- )
-
- def __is_homonymie(self, page_dict):
- for cat in page_dict.get(u"categories", []):
- if u'Catégorie:Homonymie' in cat.get(u"title", u"") or u'Category:Disambiguation pages' in cat.get(u"title", u""):
- return True
- return False
-
-
- def process_wp_response(self, label, response):
-
-
- query_dict = response['query']
- # get page if multiple pages or none -> return Tag.null_result
- pages = query_dict.get("pages", {})
- if len(pages) > 1 or len(pages) == 0:
- return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
-
- page = pages.values()[0]
-
- if u"invalid" in page or u"missing" in page:
- return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None
-
- url = page.get(u'fullurl', None)
- pageid = page.get(u'pageid', None)
- new_label = page[u'title']
-
- if self.__is_homonymie(page):
- status = Tag.TAG_URL_STATUS_DICT["homonyme"]
- elif u"redirect" in page:
- status = Tag.TAG_URL_STATUS_DICT["redirection"]
- else:
- status = Tag.TAG_URL_STATUS_DICT["match"]
-
- return new_label, status, url, pageid
-
- def show_progress(self, current_line, total_line, label, width):
-
- percent = (float(current_line) / float(total_line)) * 100.0
-
- marks = math.floor(width * (percent / 100.0))
- spaces = math.floor(width - marks)
-
- loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
-
- sys.stdout.write(u"%s %d%% %d/%d - %s\r" % (loader, percent, current_line - 1, total_line - 1, repr(label))) #takes the header into account
- if percent >= 100:
- sys.stdout.write("\n")
- sys.stdout.flush()
-
- def handle_noargs(self, **options):
-
- self.style = no_style()
-
- interactive = options.get('interactive', True)
-
- verbosity = int(options.get('verbosity', '1'))
-
- force = options.get('force', False)
-
- limit = options.get("limit", -1)
- start = options.get("start", 0)
-
- site_url = options.get('site_url', settings.WIKIPEDIA_API_URL)
-
- random = options.get('random', False)
-
- if verbosity > 2:
- print "option passed : " + repr(options)
-
- if force and interactive:
- confirm = raw_input("""You have requested to query and replace the wikipedia information for all datasheets.
-Are you sure you want to do this ?
- Type 'yes' to continue, or 'no' to cancel: """)
- else:
- confirm = 'yes'
-
- if confirm != "yes":
- print "wikipedia query cancelled"
- return
-
- if force:
- queryset = Tag.objects.all()
- else:
- queryset = Tag.objects.filter(url_status=None)
-
- if random:
- queryset = queryset.order_by("?")
- else:
- queryset = queryset.order_by("label")
-
- if limit >= 0:
- queryset = queryset[start:limit]
- else:
- queryset = queryset[start:]
-
-
- if verbosity > 2 :
- print "Tag Query is %s" % (queryset.query)
-
- site = wiki.Wiki(site_url) #@UndefinedVariable
-
-
- count = queryset.count()
- if verbosity > 1:
- print "Processing %d tags" % (count)
-
-
-
- for i, tag in enumerate(queryset):
-
- if verbosity > 1:
- print "processing tag %s (%d/%d)" % (tag.label, i + 1, count)
- else:
- self.show_progress(i + 1, count, tag.label, 60)
-
- process_tag(site, tag, verbosity)
-
-
--- a/web/hdabo/models.py Mon Jun 20 15:49:22 2011 +0200
+++ b/web/hdabo/models.py Wed Jun 22 01:00:47 2011 +0200
@@ -1,278 +1,280 @@
-# -*- coding: utf-8 -*-
-
-from django.contrib.auth.models import User
-from django.db import models
-from hdabo.fields import SortedManyToManyField
-from hdabo.utils import Property
-import datetime
-
-class Organisation(models.Model):
- hda_id = models.CharField(max_length=512, unique=True, blank=False, null=False)
- name = models.CharField(max_length=512, unique=False, blank=False, null=False)
- location = models.CharField(max_length=512, unique=False, blank=True, null=True)
- website = models.CharField(max_length=2048, unique=False, blank=True, null=True)
-
-
-class Author(models.Model):
- hda_id = models.CharField(max_length=512, unique=True, blank=False, null=False)
- lastname = models.CharField(max_length=512, unique=False, blank=True, null=True)
- firstname = models.CharField(max_length=512, unique=False, blank=True, null=True)
-
-class TimePeriod(models.Model):
- TIME_PERIOD_CHOICES = (
- (1, u'Primaire'),
- (2, u'Collège'),
- (3, u'Lycée'),
- )
- TIME_PERIOD_DICT = {
- u'Primaire': 1,
- u'Collège': 2,
- u'Lycée': 3,
- }
- label = models.CharField(max_length=512, unique=False, blank=False, null=False)
- school_period = models.IntegerField(choices=TIME_PERIOD_CHOICES)
-
- class Meta:
- unique_together = ("label", "school_period")
-
- def __unicode__(self):
- return unicode(self.label)
-
-
-class Domain(models.Model):
- DOMAIN_PERIOD_CHOICES = (
- (0, u'Global'),
- (1, u'Primaire'),
- (2, u'Collège'),
- (3, u'Lycée'),
- )
- DOMAIN_PERIOD_DICT = {
- u'Global': 0,
- u'Primaire': 1,
- u'Collège': 2,
- u'Lycée': 3,
- }
- label = models.CharField(max_length=512, unique=False, blank=False, null=False)
- school_period = models.IntegerField(choices=DOMAIN_PERIOD_CHOICES)
-
- class Meta:
- unique_together = ("label", "school_period")
-
- def __unicode__(self):
- return unicode(self.label)
-
-
-class DocumentFormat(models.Model):
- label = models.CharField(max_length=512, unique=True, blank=False, null=False)
-
- def __unicode__(self):
- return unicode(self.label)
-
-class TagCategory(models.Model):
- label = models.CharField(max_length=512, unique=True, blank=False, null=False)
-
- def __unicode__(self):
- return unicode(self.label)
-
- class Meta:
- verbose_name_plural = "TagCategories"
-
-class Tag(models.Model):
- TAG_URL_STATUS_CHOICES = (
- (0, "null_result"),
- (1, "redirection"),
- (2, "homonyme"),
- (3, "match"),
- )
-
- TAG_URL_STATUS_DICT = {
- "null_result":0,
- "redirection":1,
- "homonyme":2,
- "match":3,
- }
-
- label = models.CharField(max_length=1024, unique=False, blank=False, null=False)
- original_label = models.CharField(max_length=1024, unique=True, blank=False, null=False, editable=False)
- alias = models.CharField(max_length=1024, unique=False, blank=True, null=True)
- category = models.ForeignKey(TagCategory, null=True, blank=True)
- wikipedia_url = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True)
- wikipedia_pageid = models.BigIntegerField(unique=False, blank=True, null=True)
- url_status = models.IntegerField(choices=TAG_URL_STATUS_CHOICES, blank=True, null=True, default=None)
- dbpedia_uri = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True)
-
- @Property
- def url_status_text(): #@NoSelf
- def fget(self):
- return self.TAG_URL_STATUS_CHOICES[self.url_status][1]
-
- return locals()
-
-class Location(models.Model):
- name = models.CharField(max_length=512, unique=False, blank=False, null=False)
- insee = models.CharField(max_length=5, unique=True, blank=False, null=False)
-
- def __unicode__(self):
- return unicode("%s : %s" % (self.name, self.insee))
-
-class Datasheet(models.Model):
- hda_id = models.CharField(max_length=512, unique=True, blank=False, null=False)
- author = models.ForeignKey(Author, null=True, blank=True)
- organisation = models.ForeignKey(Organisation)
- title = models.CharField(max_length=2048, unique=False, blank=False, null=False)
- description = models.TextField(blank=True, null=True)
- url = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True)
- domains = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Global']}, related_name="datasheets")
- primary_periods = SortedManyToManyField(TimePeriod, limit_choices_to={'school_period':TimePeriod.TIME_PERIOD_DICT[u'Primaire']}, related_name="primary_periods_datasheets")
- college_periods = SortedManyToManyField(TimePeriod, limit_choices_to={'school_period':TimePeriod.TIME_PERIOD_DICT[u'Collège']}, related_name="college_periods_datasheets")
- highschool_periods = SortedManyToManyField(TimePeriod, limit_choices_to={'school_period':TimePeriod.TIME_PERIOD_DICT[u'Lycée']}, related_name="highschool_periods_datasheets")
- primary_themes = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Primaire']}, related_name="primary_themes_datasheets")
- college_themes = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Collège']}, related_name="college_themes_datasheets")
- highschool_themes = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Lycée']}, related_name="highschool_themes_datasheets")
- town = models.ForeignKey(Location, null=True, blank=True)
- format = models.ForeignKey(DocumentFormat, null=True, blank=True)
- original_creation_date = models.DateField()
- original_modification_date = models.DateField()
- modification_datetime = models.DateTimeField(auto_now=True)
- validation_date = models.DateTimeField(null=True, blank=True)
- validated = models.BooleanField(default=False)
- validator = models.ForeignKey(User, null=True, blank=True)
- tags = models.ManyToManyField(Tag, through='TaggedSheet')
-
-
- def validate(self, user):
- self.validation_date = datetime.datetime.now()
- self.validated = True
- self.validator = user
- self.save()
-
- def unvalidate(self):
- self.validation_date = datetime.datetime.min
- self.validated = False
- self.validator = None
- self.save()
-
- @Property
- def domains_list(): #@NoSelf
- def fget(self):
- return [d.label for d in self.domains.all()]
-
- return locals()
-
- @Property
- def domains_text(): #@NoSelf
- def fget(self):
- return "; ".join(self.domains_list)
-
- return locals()
-
- @Property
- def primary_periods_list(): #@NoSelf
- def fget(self):
- return [d.label for d in self.primary_periods.all()]
-
- return locals()
-
-
- @Property
- def primary_periods_text(): #@NoSelf
- def fget(self):
- return "; ".join(self.primary_periods_list)
-
- return locals()
-
- @Property
- def college_periods_list(): #@NoSelf
- def fget(self):
- return [d.label for d in self.college_periods.all()]
-
- return locals()
-
- @Property
- def college_periods_text(): #@NoSelf
- def fget(self):
- return "; ".join(self.college_periods_list)
-
- return locals()
-
- @Property
- def highschool_periods_list(): #@NoSelf
- def fget(self):
- return [d.label for d in self.highschool_periods.all()]
-
- return locals()
-
- @Property
- def highschool_periods_text(): #@NoSelf
- def fget(self):
- return "; ".join(self.highschool_periods_list)
-
- return locals()
-
-
- @Property
- def primary_themes_list(): #@NoSelf
- def fget(self):
- return [d.label for d in self.primary_themes.all()]
-
- return locals()
-
-
- @Property
- def primary_themes_text(): #@NoSelf
- def fget(self):
- return "; ".join(self.primary_themes_list)
-
- return locals()
-
- @Property
- def college_themes_list(): #@NoSelf
- def fget(self):
- return [d.label for d in self.college_themes.all()]
-
- return locals()
-
- @Property
- def college_themes_text(): #@NoSelf
- def fget(self):
- return "; ".join(self.college_themes_list)
-
- return locals()
-
- @Property
- def highschool_themes_list(): #@NoSelf
- def fget(self):
- return [d.label for d in self.highschool_themes.all()]
-
- return locals()
-
- @Property
- def highschool_themes_text(): #@NoSelf
- def fget(self):
- return "; ".join(self.highschool_themes_list)
-
- return locals()
-
- @Property
- def town_text(): #@NoSelf
- def fget(self):
- return self.town.name if self.town else ""
-
- return locals()
-
- @Property
- def tags_text(): #@NoSelf
- def fget(self):
- return "; ".join([t.label for t in self.tags.all()])
-
- return locals()
-
-
-class TaggedSheet(models.Model):
- datasheet = models.ForeignKey(Datasheet)
- tag = models.ForeignKey(Tag)
- original_order = models.IntegerField(default=0)
- order = models.IntegerField(default=0)
- index_note = models.FloatField(default=0.0)
-
-
+# -*- coding: utf-8 -*-
+
+from django.contrib.auth.models import User
+from django.db import models
+from hdabo.fields import SortedManyToManyField
+from hdabo.utils import Property
+import datetime
+
+class Organisation(models.Model):
+ hda_id = models.CharField(max_length=512, unique=True, blank=False, null=False)
+ name = models.CharField(max_length=512, unique=False, blank=False, null=False)
+ location = models.CharField(max_length=512, unique=False, blank=True, null=True)
+ website = models.CharField(max_length=2048, unique=False, blank=True, null=True)
+
+
+class Author(models.Model):
+ hda_id = models.CharField(max_length=512, unique=True, blank=False, null=False)
+ lastname = models.CharField(max_length=512, unique=False, blank=True, null=True)
+ firstname = models.CharField(max_length=512, unique=False, blank=True, null=True)
+
+class TimePeriod(models.Model):
+ TIME_PERIOD_CHOICES = (
+ (1, u'Primaire'),
+ (2, u'Collège'),
+ (3, u'Lycée'),
+ )
+ TIME_PERIOD_DICT = {
+ u'Primaire': 1,
+ u'Collège': 2,
+ u'Lycée': 3,
+ }
+ label = models.CharField(max_length=512, unique=False, blank=False, null=False)
+ school_period = models.IntegerField(choices=TIME_PERIOD_CHOICES)
+
+ class Meta:
+ unique_together = ("label", "school_period")
+
+ def __unicode__(self):
+ return unicode(self.label)
+
+
+class Domain(models.Model):
+ DOMAIN_PERIOD_CHOICES = (
+ (0, u'Global'),
+ (1, u'Primaire'),
+ (2, u'Collège'),
+ (3, u'Lycée'),
+ )
+ DOMAIN_PERIOD_DICT = {
+ u'Global': 0,
+ u'Primaire': 1,
+ u'Collège': 2,
+ u'Lycée': 3,
+ }
+ label = models.CharField(max_length=512, unique=False, blank=False, null=False)
+ school_period = models.IntegerField(choices=DOMAIN_PERIOD_CHOICES)
+
+ class Meta:
+ unique_together = ("label", "school_period")
+
+ def __unicode__(self):
+ return unicode(self.label)
+
+
+class DocumentFormat(models.Model):
+ label = models.CharField(max_length=512, unique=True, blank=False, null=False)
+
+ def __unicode__(self):
+ return unicode(self.label)
+
+class TagCategory(models.Model):
+ label = models.CharField(max_length=512, unique=True, blank=False, null=False)
+
+ def __unicode__(self):
+ return unicode(self.label)
+
+ class Meta:
+ verbose_name_plural = "TagCategories"
+
+class Tag(models.Model):
+ TAG_URL_STATUS_CHOICES = (
+ (0, "null_result"),
+ (1, "redirection"),
+ (2, "homonyme"),
+ (3, "match"),
+ )
+
+ TAG_URL_STATUS_DICT = {
+ "null_result":0,
+ "redirection":1,
+ "homonyme":2,
+ "match":3,
+ }
+
+ label = models.CharField(max_length=1024, unique=False, blank=False, null=False, db_index=True)
+ original_label = models.CharField(max_length=1024, unique=True, blank=False, null=False, editable=False)
+ alias = models.CharField(max_length=1024, unique=False, blank=True, null=True)
+ category = models.ForeignKey(TagCategory, null=True, blank=True)
+ wikipedia_url = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True)
+ wikipedia_pageid = models.BigIntegerField(unique=False, blank=True, null=True)
+ url_status = models.IntegerField(choices=TAG_URL_STATUS_CHOICES, blank=True, null=True, default=None)
+ dbpedia_uri = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True)
+ popularity = models.IntegerField(blank=False, null=False, default=0, db_index=True)
+
+ @Property
+ def url_status_text(): #@NoSelf
+ def fget(self):
+ return self.TAG_URL_STATUS_CHOICES[self.url_status][1]
+
+ return locals()
+
+class Location(models.Model):
+ name = models.CharField(max_length=512, unique=False, blank=False, null=False)
+ insee = models.CharField(max_length=5, unique=True, blank=False, null=False)
+
+ def __unicode__(self):
+ return unicode("%s : %s" % (self.name, self.insee))
+
+class Datasheet(models.Model):
+ hda_id = models.CharField(max_length=512, unique=True, blank=False, null=False)
+ author = models.ForeignKey(Author, null=True, blank=True)
+ organisation = models.ForeignKey(Organisation)
+ title = models.CharField(max_length=2048, unique=False, blank=False, null=False)
+ description = models.TextField(blank=True, null=True)
+ url = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True)
+ domains = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Global']}, related_name="datasheets")
+ primary_periods = SortedManyToManyField(TimePeriod, limit_choices_to={'school_period':TimePeriod.TIME_PERIOD_DICT[u'Primaire']}, related_name="primary_periods_datasheets")
+ college_periods = SortedManyToManyField(TimePeriod, limit_choices_to={'school_period':TimePeriod.TIME_PERIOD_DICT[u'Collège']}, related_name="college_periods_datasheets")
+ highschool_periods = SortedManyToManyField(TimePeriod, limit_choices_to={'school_period':TimePeriod.TIME_PERIOD_DICT[u'Lycée']}, related_name="highschool_periods_datasheets")
+ primary_themes = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Primaire']}, related_name="primary_themes_datasheets")
+ college_themes = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Collège']}, related_name="college_themes_datasheets")
+ highschool_themes = SortedManyToManyField(Domain, limit_choices_to={'school_period':Domain.DOMAIN_PERIOD_DICT[u'Lycée']}, related_name="highschool_themes_datasheets")
+ town = models.ForeignKey(Location, null=True, blank=True)
+ format = models.ForeignKey(DocumentFormat, null=True, blank=True)
+ original_creation_date = models.DateField()
+ original_modification_date = models.DateField()
+ modification_datetime = models.DateTimeField(auto_now=True)
+ validation_date = models.DateTimeField(null=True, blank=True)
+ validated = models.BooleanField(default=False, db_index=True)
+ validator = models.ForeignKey(User, null=True, blank=True)
+ manual_order = models.BooleanField(default=False, db_index=True)
+ tags = models.ManyToManyField(Tag, through='TaggedSheet')
+
+
+ def validate(self, user):
+ self.validation_date = datetime.datetime.now()
+ self.validated = True
+ self.validator = user
+ self.save()
+
+ def unvalidate(self):
+ self.validation_date = datetime.datetime.min
+ self.validated = False
+ self.validator = None
+ self.save()
+
+ @Property
+ def domains_list(): #@NoSelf
+ def fget(self):
+ return [d.label for d in self.domains.all()]
+
+ return locals()
+
+ @Property
+ def domains_text(): #@NoSelf
+ def fget(self):
+ return "; ".join(self.domains_list)
+
+ return locals()
+
+ @Property
+ def primary_periods_list(): #@NoSelf
+ def fget(self):
+ return [d.label for d in self.primary_periods.all()]
+
+ return locals()
+
+
+ @Property
+ def primary_periods_text(): #@NoSelf
+ def fget(self):
+ return "; ".join(self.primary_periods_list)
+
+ return locals()
+
+ @Property
+ def college_periods_list(): #@NoSelf
+ def fget(self):
+ return [d.label for d in self.college_periods.all()]
+
+ return locals()
+
+ @Property
+ def college_periods_text(): #@NoSelf
+ def fget(self):
+ return "; ".join(self.college_periods_list)
+
+ return locals()
+
+ @Property
+ def highschool_periods_list(): #@NoSelf
+ def fget(self):
+ return [d.label for d in self.highschool_periods.all()]
+
+ return locals()
+
+ @Property
+ def highschool_periods_text(): #@NoSelf
+ def fget(self):
+ return "; ".join(self.highschool_periods_list)
+
+ return locals()
+
+
+ @Property
+ def primary_themes_list(): #@NoSelf
+ def fget(self):
+ return [d.label for d in self.primary_themes.all()]
+
+ return locals()
+
+
+ @Property
+ def primary_themes_text(): #@NoSelf
+ def fget(self):
+ return "; ".join(self.primary_themes_list)
+
+ return locals()
+
+ @Property
+ def college_themes_list(): #@NoSelf
+ def fget(self):
+ return [d.label for d in self.college_themes.all()]
+
+ return locals()
+
+ @Property
+ def college_themes_text(): #@NoSelf
+ def fget(self):
+ return "; ".join(self.college_themes_list)
+
+ return locals()
+
+ @Property
+ def highschool_themes_list(): #@NoSelf
+ def fget(self):
+ return [d.label for d in self.highschool_themes.all()]
+
+ return locals()
+
+ @Property
+ def highschool_themes_text(): #@NoSelf
+ def fget(self):
+ return "; ".join(self.highschool_themes_list)
+
+ return locals()
+
+ @Property
+ def town_text(): #@NoSelf
+ def fget(self):
+ return self.town.name if self.town else ""
+
+ return locals()
+
+ @Property
+ def tags_text(): #@NoSelf
+ def fget(self):
+ return "; ".join([t.label for t in self.tags.all()])
+
+ return locals()
+
+
+class TaggedSheet(models.Model):
+ datasheet = models.ForeignKey(Datasheet)
+ tag = models.ForeignKey(Tag)
+ original_order = models.IntegerField(null=False, blank=False, default=0)
+ order = models.IntegerField(null=False, blank=False, default=0, db_index=True)
+ index_note = models.FloatField(null=False, blank=False, default=0.0, db_index=True)
+
+
--- a/web/hdabo/settings.py Mon Jun 20 15:49:22 2011 +0200
+++ b/web/hdabo/settings.py Wed Jun 22 01:00:47 2011 +0200
@@ -148,4 +148,6 @@
WIKIPEDIA_API_URL = "http://fr.wikipedia.org/w/api.php"
+DBPEDIA_URI_TEMPLATE = "http://dbpedia.org/resource/%s"
+
from hdabo.config import * #@UnusedWildImport
Binary file web/hdabo/static/hdabo/img/arrow_green_right.png has changed
--- a/web/hdabo/templates/partial/all_tags_table.html Mon Jun 20 15:49:22 2011 +0200
+++ b/web/hdabo/templates/partial/all_tags_table.html Wed Jun 22 01:00:47 2011 +0200
@@ -4,6 +4,7 @@
<th>label</th>
{% comment %}<th>original_label</th>{% endcomment %}
<th class="text_centered">Lien W</th>
+ <th class="text_centered">Lien D</th>
<th>Catégorie</th>
<th class="large_25 text_centered">Supprimer<br/>le lien W</th>
<th>Alias</th></tr>
@@ -17,7 +18,15 @@
<a href="{{tag.wikipedia_url}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/arrow_right.png" ></a>
{% else %}
<a href="http://fr.wikipedia.org/w/index.php?search={{tag.label}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/wikipedia_search.png" ></a>
- {% endif %}</td>
+ {% endif %}
+ </td>
+ <td class="text_centered">
+ {% if tag.dbpedia_uri and tag.dbpedia_uri != "" %}
+ <a href="{{tag.dbpedia_uri}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/arrow_green_right.png" ></a>
+ {% else %}
+
+ {% endif %}
+ </td>
<td class="tag_category" id="{{tag.id}}">{% if tag.category %}{{ tag.category }}{% endif %}</td>
<td class="text_centered"><img src="{{STATIC_URL}}hdabo/img/red_cross.png" class="remove_wp_link" id="{{tag.id}}" alt="{{tag.label}}" /></td>
<td class="tag_alias" id="{{tag.id}}" >{% if tag.alias %}{{tag.alias}}{% endif %}</td></tr>
--- a/web/hdabo/templates/partial/tag_table.html Mon Jun 20 15:49:22 2011 +0200
+++ b/web/hdabo/templates/partial/tag_table.html Wed Jun 22 01:00:47 2011 +0200
@@ -8,6 +8,7 @@
<th>label</th>
{% comment %}<th>original_label</th>{% endcomment %}
<th class="text_centered">Lien W</th>
+ <th class="text_centered">Lien D</th>
<th>Catégorie</th>
<th class="large_25 text_centered">Supprimer<br/>le lien W</th>
<th>Alias</th>
@@ -28,7 +29,15 @@
<a href="{{t.tag.wikipedia_url}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/arrow_right.png" ></a>
{% else %}
<a href="http://fr.wikipedia.org/w/index.php?search={{t.tag.label}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/wikipedia_search.png" ></a>
- {% endif %}</td>
+ {% endif %}
+ </td>
+ <td class="text_centered">
+ {% if tag.dbpedia_uri and tag.dbpedia_uri != "" %}
+ <a href="{{tag.dbpedia_uri}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/arrow_green_right.png" ></a>
+ {% else %}
+
+ {% endif %}
+ </td>
<td>{% if t.tag.category %}{{ t.tag.category }}{% endif %}</td>
<td class="text_centered"><img src="{{STATIC_URL}}hdabo/img/red_cross.png" class="remove_wp_link" id="{{t.tag.id}}" alt="{{t.tag.label}}" /></td>
<td>{% if t.tag.alias %}{{t.tag.alias}}{% endif %}</td>
@@ -50,7 +59,15 @@
<a href="{{t.wikipedia_url}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/arrow_right.png" ></a>
{% else %}
<a href="http://fr.wikipedia.org/w/index.php?search={{t.label}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/wikipedia_search.png" ></a>
- {% endif %}</td>
+ {% endif %}
+ </td>
+ <td class="text_centered">
+ {% if tag.dbpedia_uri and tag.dbpedia_uri != "" %}
+ <a href="{{tag.dbpedia_uri}}" target="_blank"><img src="{{STATIC_URL}}hdabo/img/arrow_green_right.png" ></a>
+ {% else %}
+
+ {% endif %}
+ </td>
<td>{% if t.category %}{{ t.category }}{% endif %}</td>
<td class="text_centered"><img src="{{STATIC_URL}}hdabo/img/red_cross.png" class="remove_wp_link" id="{{t.id}}" /></td>
<td>{{t.alias}}</td>
--- a/web/hdabo/views.py Mon Jun 20 15:49:22 2011 +0200
+++ b/web/hdabo/views.py Wed Jun 22 01:00:47 2011 +0200
@@ -4,15 +4,15 @@
from django.contrib.auth.decorators import login_required #@UnusedImport
from django.core.paginator import Paginator
from django.db.models import Max
-from django.http import HttpResponse, HttpResponseBadRequest
+from django.http import HttpResponseBadRequest
from django.shortcuts import render_to_response, redirect
from django.template import RequestContext
from haystack.constants import DJANGO_ID
from haystack.query import SearchQuerySet
-from hdabo.management.commands.querywikipedia import process_tag
-from hdabo.wp_utils import (normalize_tag, query_wikipedia_title,
+from hdabo.wp_utils import process_tag
+from hdabo.utils import OrderedDict
+from hdabo.wp_utils import (normalize_tag, query_wikipedia_title,
get_or_create_tag)
-from hdabo.utils import OrderedDict
from models import Datasheet, Organisation, Tag, TagCategory, TaggedSheet
from wikitools import wiki
import django.utils.simplejson as json
@@ -22,7 +22,7 @@
#@login_required
def home(request):
- # Get all organisations
+ # Get all organizations
orgas = Organisation.objects.all().order_by('name')
# Count all validated, unvalidated sheets for each organisation
org_list = []
@@ -132,7 +132,6 @@
# NB : it is different from the TagSheet.order in the database.
new_order = int(request.POST["new_order"]) - 1
old_order = int(request.POST["old_order"]) - 1
- s = "new_order = " + str(new_order) + ", old_order = " + str(old_order)
# First we get the datasheet's TaggedSheets (list to force evaluation)
ordered_tags = list(TaggedSheet.objects.filter(datasheet=Datasheet.objects.get(id=ds_id)).order_by('order'))
# We change the moved TaggedSheets's order
@@ -143,16 +142,19 @@
# We move the TaggedSheets's order
if new_order > old_order :
# And we decrease the other ones
- for i in range(old_order+1,new_order+1) :
+ for i in range(old_order + 1, new_order + 1) :
ts = ordered_tags[i]
ts.order = ts.order - 1
ts.save()
else :
# And we increase the other ones
- for i in range(new_order,old_order) :
+ for i in range(new_order, old_order) :
ts = ordered_tags[i]
ts.order = ts.order + 1
ts.save()
+ ds = Datasheet.objects.get(id=ds_id)
+ ds.manual_order = True
+ ds.save()
return get_tag_table(request=request, ds_id=ds_id, valid=0)
@@ -205,6 +207,10 @@
ts = ds_tags.filter(tag=Tag.objects.filter(id=tag_id))[0]
ts.delete()
+ ds = Datasheet.objects.get(id=ds_id)
+ ds.manual_order = True
+ ds.save()
+
return get_tag_table(request=request, ds_id=ds_id, valid=0)
@@ -218,23 +224,18 @@
if tag.label != tag_label:
-
tag.label = tag_label
site = wiki.Wiki(settings.WIKIPEDIA_API_URL) #@UndefinedVariable
- new_label, status, url, pageid, response = query_wikipedia_title(site, tag_label) #@UnusedVariable
+ wp_res = query_wikipedia_title(site, tag_label)
+ status, url, pageid, dbpedia_uri = (wp_res['status'], wp_res['wikipedia_url'], wp_res['page_id'], wp_res["dbpedia_uri"])
if status is not None:
tag.url_status = status
- if url is not None:
- tag.wikipedia_url = url
- else:
- tag.wikipedia_url = None
-
- if pageid is not None:
- tag.wikipedia_pageid = pageid
- else:
- tag.wikipedia_pageid = None
+
+ tag.wikipedia_url = url
+ tag.wikipedia_pageid = pageid
+ tag.dbpedia_uri = dbpedia_uri
tag.save()
@@ -246,7 +247,7 @@
tag_id = request.POST["id"]
tag_label = request.POST["value"]
- ds_id=request.POST["datasheet_id"]
+ ds_id = request.POST["datasheet_id"]
tag = Tag.objects.get(id=tag_id)
@@ -268,6 +269,8 @@
ts.save()
+ ds.manual_order = True
+ ds.save()
return get_tag_table(request=request, ds_id=ds_id, valid=0)
@@ -313,10 +316,12 @@
# if the tag is created or if the tag is not in the list
list_ts = TaggedSheet.objects.filter(datasheet=ds)
- if created or len(list_ts.filter(tag=tag))==0 :
+ if created or len(list_ts.filter(tag=tag)) == 0 :
new_order = list_ts.aggregate(Max('order'))['order__max'] + 1
ts = TaggedSheet.objects.create(datasheet=ds, tag=tag, original_order=new_order, order=new_order)
ts.save()
+ ds.manual_order = True
+ ds.save()
return get_tag_table(request=request, ds_id=ds_id, valid=0)
@@ -347,9 +352,15 @@
else :
valid = False
# We validate or unvalidate the requester datasheet
+
+ if request.user.is_authenticated():
+ user = request.user
+ else:
+ user = None
+
ds = Datasheet.objects.get(id=ds_id)
if valid :
- ds.validate(None)
+ ds.validate(user)
else :
ds.unvalidate()
ds.save()
@@ -361,7 +372,7 @@
else :
# We ask to display the validated ds
valid_req = 1
- if len(same_organisation_ds)>0 :
+ if len(same_organisation_ds) > 0 :
return redirect('list_for_orga', orga_id=ds.organisation.id, valid=valid_req)
else :
return redirect('home')
@@ -407,4 +418,4 @@
# This function is available only in all_tags_table context
return get_all_tags_table(request=request, num_page=request.POST["num_page"], nb_by_page=request.POST["nb_by_page"])
-
\ No newline at end of file
+
--- a/web/hdabo/wp_utils.py Mon Jun 20 15:49:22 2011 +0200
+++ b/web/hdabo/wp_utils.py Wed Jun 22 01:00:47 2011 +0200
@@ -2,6 +2,20 @@
from django.conf import settings
from hdabo.models import Tag
from wikitools import api, wiki
+from django.utils.http import urlquote
+
+def normalize_tag(tag):
+ if len(tag) == 0:
+ return tag
+ tag = tag.strip()
+ tag = tag.replace("_", " ")
+ tag = " ".join(tag.split())
+ tag = tag[0].upper() + tag[1:]
+ return tag
+
+def urlize_for_wkipedia(label):
+ return urlquote(label.replace(" ","_"))
+
def __is_homonymie(page_dict):
for cat in page_dict.get(u"categories", []):
@@ -11,21 +25,22 @@
def query_wikipedia_title(site, label):
- params = {'action':'query', 'titles': label, 'prop':'info|categories', 'inprop':'url'}
+ params = {'action':'query', 'titles': label, 'prop':'info|categories|langlinks', 'inprop':'url', 'lllimit':'500', 'cllimit':'500'}
wpquery = api.APIRequest(site, params) #@UndefinedVariable
- response = wpquery.query()
+ response = wpquery.query()
+ original_response = response
query_dict = response['query']
# get page if multiple pages or none -> return Tag.null_result
pages = query_dict.get("pages", {})
if len(pages) > 1 or len(pages) == 0:
- return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None, response
+ return { 'new_label': None, 'status': Tag.TAG_URL_STATUS_DICT["null_result"], 'wikipedia_url': None, 'pageid': None, 'dbpedia_uri': None, 'response': response }
page = pages.values()[0]
if u"invalid" in page or u"missing" in page:
- return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None, response
+ return { 'new_label': None, 'status': Tag.TAG_URL_STATUS_DICT["null_result"], 'wikipedia_url': None, 'pageid': None, 'dbpedia_uri': None, 'response': response }
url = page.get(u'fullurl', None)
pageid = page.get(u'pageid', None)
@@ -37,17 +52,37 @@
status = Tag.TAG_URL_STATUS_DICT["redirection"]
else:
status = Tag.TAG_URL_STATUS_DICT["match"]
-
- return new_label, status, url, pageid, response
+
+ if status == Tag.TAG_URL_STATUS_DICT["redirection"]:
+ params = {'action':'query', 'titles': label, 'prop':'info|categories|langlinks', 'inprop':'url', 'lllimit':'500', 'cllimit':'500', 'redirects':True}
+ wpquery = api.APIRequest(site, params) #@UndefinedVariable
+ response = wpquery.query()
+ query_dict = response['query']
+ pages = query_dict.get("pages", {})
+ #we know that we have at least one answer
+ if len(pages) > 1 or len(pages) == 0:
+ return { 'new_label': None, 'status': Tag.TAG_URL_STATUS_DICT["null_result"], 'wikipedia_url': None, 'pageid': None, 'dbpedia_uri': None, 'response': response }
+ page = pages.values()[0]
+
-def normalize_tag(tag):
- if len(tag) == 0:
- return tag
- tag = tag.strip()
- tag = tag.replace("_", " ")
- tag = " ".join(tag.split())
- tag = tag[0].upper() + tag[1:]
- return tag
+
+ #process language to extract the english label
+ english_label = None
+
+ if status == Tag.TAG_URL_STATUS_DICT['match'] or status == Tag.TAG_URL_STATUS_DICT['redirection']:
+ lang_links = page.get('langlinks', [])
+ for lang_info_dict in lang_links:
+ if lang_info_dict['lang'] == "en":
+ english_label = lang_info_dict["*"]
+ break
+
+ if english_label and "#" not in english_label:
+ dbpedia_uri = settings.DBPEDIA_URI_TEMPLATE % (urlize_for_wkipedia(english_label))
+ else:
+ dbpedia_uri = None
+
+ return { 'new_label': new_label, 'status': status, 'wikipedia_url': url, 'pageid': pageid, 'dbpedia_uri': dbpedia_uri, 'response': original_response }
+
def get_or_create_tag(tag_label):
@@ -60,25 +95,41 @@
if created:
site = wiki.Wiki(settings.WIKIPEDIA_API_URL) #@UndefinedVariable
- new_label, status, url, pageid, response = query_wikipedia_title(site, tag_label_normalized) #@UnusedVariable
+ wp_res = query_wikipedia_title(site, tag_label_normalized) #@UnusedVariable
+ new_label, status, url, pageid, dbpedia_uri = wp_res['new_label'], wp_res['status'], wp_res['wikipedia_url'], wp_res['pageid'], wp_res["dbpedia_uri"]
+
# We save the datas
if new_label is not None:
tag.label = new_label
if status is not None:
tag.url_status = status
- if url is not None:
- tag.wikipedia_url = url
- else:
- tag.wikipedia_url = None
-
- if pageid is not None:
- tag.wikipedia_pageid = pageid
- else:
- tag.wikipedia_pageid = None
+ tag.wikipedia_url = url
+ tag.wikipedia_pageid = pageid
+ tag.dbpedia_uri = dbpedia_uri
tag.save()
return tag, created
+def process_tag(site, tag, verbosity):
+ wp_res = query_wikipedia_title(site, tag.label)
+ new_label, status, url, pageid, response, dbpedia_uri = wp_res['new_label'], wp_res['status'], wp_res['wikipedia_url'], wp_res['pageid'], wp_res['response'], wp_res["dbpedia_uri"]
+
+ if verbosity >= 2 :
+ print "response from query to %s with parameters %s :" % (site.apibase, repr(new_label))
+ print repr(response)
+
+ if new_label is not None:
+ tag.label = new_label
+ if status is not None:
+ tag.url_status = status
+ tag.wikipedia_url = url
+ tag.wikipedia_pageid = pageid
+ tag.dbpedia_uri = dbpedia_uri
+
+ tag.save()
+
+
+