Added Label corrections
authorveltr
Mon, 14 Oct 2013 17:59:15 +0200
changeset 151 ca46b8e1b717
parent 150 c4c2f3be5ddd
child 152 766fed94b3b5
Added Label corrections
.settings/org.eclipse.core.resources.prefs
data/extra-labels.csv
src/jocondelab/management/commands/get_notice_years.py
--- a/.settings/org.eclipse.core.resources.prefs	Mon Oct 14 17:39:06 2013 +0200
+++ b/.settings/org.eclipse.core.resources.prefs	Mon Oct 14 17:59:15 2013 +0200
@@ -11,6 +11,7 @@
 encoding//src/jocondelab/management/commands/import_csv.py=utf-8
 encoding//src/jocondelab/management/commands/import_dbpedia_fields.py=utf-8
 encoding//src/jocondelab/management/commands/import_dbpedia_geo_years.py=utf-8
+encoding//src/jocondelab/management/commands/import_extra_labels.py=utf-8
 encoding//src/jocondelab/management/commands/import_extra_years.py=utf-8
 encoding//src/jocondelab/management/commands/import_skos.py=utf-8
 encoding//src/jocondelab/management/commands/import_term_labels.py=utf-8
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/extra-labels.csv	Mon Oct 14 17:59:15 2013 +0200
@@ -0,0 +1,10 @@
+"dbpedia_uri";"language_code";"label"
+"http://dbpedia.org/resource/Tetricus_I";"fr";"Tetricus Ier"
+"http://dbpedia.org/resource/Daniel_Chodowiecki";"fr";"Daniel Chodowiecki"
+"http://fr.dbpedia.org/resource/Cl%C3%A9op%C3%A2tre_VII";"fr";"Cléopâtre VII"
+"http://fr.dbpedia.org/resource/Confucius";"fr";"Confucius"
+"http://fr.dbpedia.org/resource/Horace";"fr";"Horace"
+"http://fr.dbpedia.org/resource/Ptol%C3%A9m%C3%A9e_XIII";"fr";"Ptolémée XIII"
+"http://fr.dbpedia.org/resource/Saint-%C3%89tienne";"fr";"Saint-Étienne"
+"http://fr.dbpedia.org/resource/Virgile";"fr";"Virgile"
+"http://fr.dbpedia.org/resource/Alexandre_le_Grand";"fr";"Alexandre le Grand"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jocondelab/management/commands/get_notice_years.py	Mon Oct 14 17:59:15 2013 +0200
@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+from django.core.management.base import BaseCommand
+from core.models import (Notice, Thesaurus)
+import re
+
+class Command(BaseCommand):
+        
+    def handle(self, *args, **options):
+        
+        millcache = {}
+        pericache = {}
+        yearre = re.compile("\d+")
+        rejectre = re.compile("\d-\d")
+        beforre = re.compile("av(\.|ant)? JC|- ", re.I)
+        splitre = re.compile("\s*[,;]\s*")
+        sieclere = re.compile("si..?cle", re.I)
+        millenairere = re.compile("mill..?naire",re.I)
+        moitiere = re.compile("moiti", re.I)
+        quartre = re.compile("quart", re.I)
+        
+        def getyear(millesime):
+            year = None
+            if not rejectre.search(millesime):
+                yearmatch = yearre.search(millesime)
+                if yearmatch:
+                    year = int(millesime[yearmatch.start():yearmatch.end()])
+                    if beforre.search(millesime):
+                        year = - year
+                    if year > 2012:
+                        year = None
+            if year is None:
+                print '!!!!', millesime
+            millcache[millesime] = year
+            return year
+        
+        def getperiod(p):
+            duration = 0
+            counts = yearre.findall(p)
+            res = []
+            avjc = False
+            if len(counts):
+                if millenairere.search(p):
+                    duration = 1000
+                    avjc = True #Les millénaires positifs ne sont jamais utilisés, les négatifs ne sont pas toujours précisés
+                if sieclere.search(p):
+                    duration = 100
+                    nb = int(counts[-1])
+                if beforre.search(p):
+                    avjc = True
+                start = duration * (nb - 1)
+                if avjc:
+                    start = - start - duration
+                partial = False
+                if moitiere.search(p):
+                    partial = True
+                    duration /= 2
+                if quartre.search(p):
+                    partial = True
+                    duration /= 4
+                if len(counts) > 1 and not partial:
+                    print '!!!!', p
+                    return res
+                if len(counts) == 1 and partial:
+                    print '!!!!', p
+                    return res
+                if partial:
+                    nb = int(counts[0])
+                    start += duration * (nb - 1)
+                if not avjc:
+                    start += 1
+                end = start + duration - 1
+                res = [start, end]
+            return res
+        
+        qs = Notice.objects.iterator()
+        for notice in qs:
+            millfield = notice.mill
+            years = []
+            if millfield:
+                print millfield 
+                millesimes = splitre.split(millfield)
+                years = [millcache[m] if m in millcache else getyear(m) for m in millesimes]
+                years = [y for y in years if y is not None]
+            perifield = notice.peri
+            if not len(years) and perifield:
+                print perifield
+                periodes = splitre.split(perifield)
+                for p in periodes:
+                    if not p in pericache:
+                        pericache[p] = getperiod(p)
+                    years += pericache[p]
+            if len(years):
+                print '      ---->', min(years), max(years)
+        
\ No newline at end of file