# HG changeset patch # User veltr # Date 1381766355 -7200 # Node ID ca46b8e1b717d3a386772515ffb777db265d9035 # Parent c4c2f3be5ddda08e79408b5121cd671e6d35ea04 Added Label corrections diff -r c4c2f3be5ddd -r ca46b8e1b717 .settings/org.eclipse.core.resources.prefs --- a/.settings/org.eclipse.core.resources.prefs Mon Oct 14 17:39:06 2013 +0200 +++ b/.settings/org.eclipse.core.resources.prefs Mon Oct 14 17:59:15 2013 +0200 @@ -11,6 +11,7 @@ encoding//src/jocondelab/management/commands/import_csv.py=utf-8 encoding//src/jocondelab/management/commands/import_dbpedia_fields.py=utf-8 encoding//src/jocondelab/management/commands/import_dbpedia_geo_years.py=utf-8 +encoding//src/jocondelab/management/commands/import_extra_labels.py=utf-8 encoding//src/jocondelab/management/commands/import_extra_years.py=utf-8 encoding//src/jocondelab/management/commands/import_skos.py=utf-8 encoding//src/jocondelab/management/commands/import_term_labels.py=utf-8 diff -r c4c2f3be5ddd -r ca46b8e1b717 data/extra-labels.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data/extra-labels.csv Mon Oct 14 17:59:15 2013 +0200 @@ -0,0 +1,10 @@ +"dbpedia_uri";"language_code";"label" +"http://dbpedia.org/resource/Tetricus_I";"fr";"Tetricus Ier" +"http://dbpedia.org/resource/Daniel_Chodowiecki";"fr";"Daniel Chodowiecki" +"http://fr.dbpedia.org/resource/Cl%C3%A9op%C3%A2tre_VII";"fr";"Cléopâtre VII" +"http://fr.dbpedia.org/resource/Confucius";"fr";"Confucius" +"http://fr.dbpedia.org/resource/Horace";"fr";"Horace" +"http://fr.dbpedia.org/resource/Ptol%C3%A9m%C3%A9e_XIII";"fr";"Ptolémée XIII" +"http://fr.dbpedia.org/resource/Saint-%C3%89tienne";"fr";"Saint-Étienne" +"http://fr.dbpedia.org/resource/Virgile";"fr";"Virgile" +"http://fr.dbpedia.org/resource/Alexandre_le_Grand";"fr";"Alexandre le Grand" diff -r c4c2f3be5ddd -r ca46b8e1b717 src/jocondelab/management/commands/get_notice_years.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/jocondelab/management/commands/get_notice_years.py Mon Oct 14 17:59:15 2013 +0200 @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +from django.core.management.base import BaseCommand +from core.models import (Notice, Thesaurus) +import re + +class Command(BaseCommand): + + def handle(self, *args, **options): + + millcache = {} + pericache = {} + yearre = re.compile("\d+") + rejectre = re.compile("\d-\d") + beforre = re.compile("av(\.|ant)? JC|- ", re.I) + splitre = re.compile("\s*[,;]\s*") + sieclere = re.compile("si..?cle", re.I) + millenairere = re.compile("mill..?naire",re.I) + moitiere = re.compile("moiti", re.I) + quartre = re.compile("quart", re.I) + + def getyear(millesime): + year = None + if not rejectre.search(millesime): + yearmatch = yearre.search(millesime) + if yearmatch: + year = int(millesime[yearmatch.start():yearmatch.end()]) + if beforre.search(millesime): + year = - year + if year > 2012: + year = None + if year is None: + print '!!!!', millesime + millcache[millesime] = year + return year + + def getperiod(p): + duration = 0 + counts = yearre.findall(p) + res = [] + avjc = False + if len(counts): + if millenairere.search(p): + duration = 1000 + avjc = True #Les millénaires positifs ne sont jamais utilisés, les négatifs ne sont pas toujours précisés + if sieclere.search(p): + duration = 100 + nb = int(counts[-1]) + if beforre.search(p): + avjc = True + start = duration * (nb - 1) + if avjc: + start = - start - duration + partial = False + if moitiere.search(p): + partial = True + duration /= 2 + if quartre.search(p): + partial = True + duration /= 4 + if len(counts) > 1 and not partial: + print '!!!!', p + return res + if len(counts) == 1 and partial: + print '!!!!', p + return res + if partial: + nb = int(counts[0]) + start += duration * (nb - 1) + if not avjc: + start += 1 + end = start + duration - 1 + res = [start, end] + return res + + qs = Notice.objects.iterator() + for notice in qs: + millfield = notice.mill + years = [] + if millfield: + print millfield + millesimes = splitre.split(millfield) + years = [millcache[m] if m in millcache else getyear(m) for m in millesimes] + years = [y for y in years if y is not None] + perifield = notice.peri + if not len(years) and perifield: + print perifield + periodes = splitre.split(perifield) + for p in periodes: + if not p in pericache: + pericache[p] = getperiod(p) + years += pericache[p] + if len(years): + print ' ---->', min(years), max(years) + \ No newline at end of file