Added Geographic Inclusion in the Django version (server only)
authorveltr
Wed, 22 Feb 2012 18:55:35 +0100
changeset 122 fde8335a037c
parent 121 f316ba5856fe
child 123 94fc5f5b5cfd
Added Geographic Inclusion in the Django version (server only)
.hgignore
.settings/org.eclipse.core.resources.prefs
virtualenv/res/lib/lib_create_env.py
virtualenv/res/src/SPARQLWrapper-1.5.0.tar.gz
virtualenv/web/res/res_create_env.py
web/hdalab/.htaccess
web/hdalab/management/commands/fill_tag_years.py
web/hdalab/management/commands/query_geo_inclusion.py
web/hdalab/migrations/0007_geographic_inclusion.py
web/hdalab/models/__init__.py
web/hdalab/models/dataviz.py
web/hdalab/views/ajax.py
--- a/.hgignore	Fri Feb 17 13:07:49 2012 +0100
+++ b/.hgignore	Wed Feb 22 18:55:35 2012 +0100
@@ -20,4 +20,8 @@
 syntax: regexp
 \.sh$
 syntax: regexp
-^web/hdalab/config\.py$
\ No newline at end of file
+^web/hdalab/config\.py$
+syntax: regexp
+^web/hdalab/\.htaccess$
+syntax: regexp
+^web/hdalab/\.htaccess$
\ No newline at end of file
--- a/.settings/org.eclipse.core.resources.prefs	Fri Feb 17 13:07:49 2012 +0100
+++ b/.settings/org.eclipse.core.resources.prefs	Wed Feb 22 18:55:35 2012 +0100
@@ -1,40 +1,43 @@
-#Wed Feb 15 16:29:39 CET 2012
-eclipse.preferences.version=1
-encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/haystack/backends/__init__.py=utf-8
-encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/fields.py=utf-8
-encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/forms.py=utf-8
-encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/tests.py=utf-8
-encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/whoosh/analysis.py=utf8
-encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/api.py=utf-8
-encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/wiki.py=utf-8
-encoding//web/hdabo/forms.py=utf-8
-encoding//web/hdabo/management/commands/import_csv.py=utf-8
-encoding//web/hdabo/management/commands/import_tag_popularity.py=utf-8
-encoding//web/hdabo/management/commands/query_wikipedia.py=utf-8
-encoding//web/hdabo/migrations/0001_initial.py=utf-8
-encoding//web/hdabo/migrations/0002_backport_hdabo_sf.py=utf-8
-encoding//web/hdabo/migrations/0003_update_redirection.py=utf-8
-encoding//web/hdabo/models.py=utf-8
-encoding//web/hdabo/search/french_whoosh_backend.py=utf-8
-encoding//web/hdabo/tests/models.py=utf-8
-encoding//web/hdabo/utils.py=utf-8
-encoding//web/hdabo/views.py=utf-8
-encoding//web/hdabo/wp_utils.py=utf-8
-encoding//web/hdalab/__init__.py=utf-8
-encoding//web/hdalab/config.py=utf-8
-encoding//web/hdalab/manage.py=utf-8
-encoding//web/hdalab/management/commands/export_tags_csv.py=utf-8
-encoding//web/hdalab/management/commands/export_wpcategory_csv.py=utf-8
-encoding//web/hdalab/management/commands/fill_country_codes.py=utf-8
-encoding//web/hdalab/management/commands/fill_tag_years.py=utf-8
-encoding//web/hdalab/management/commands/query_dbpedia.py=utf-8
-encoding//web/hdalab/management/commands/query_wikipedia_category.py=utf-8
-encoding//web/hdalab/management/utils.py=utf-8
-encoding//web/hdalab/migrations/0001_initial.py=utf-8
-encoding//web/hdalab/migrations/0002_dataviz.py=utf-8
-encoding//web/hdalab/migrations/0003_hdasession.py=utf-8
-encoding//web/hdalab/migrations/0005_rename_end_year.py=utf-8
-encoding//web/hdalab/models/dataviz.py=utf-8
-encoding//web/hdalab/settings.py=utf-8
-encoding//web/hdalab/urls.py=utf-8
-encoding//web/hdalab/views/ajax.py=utf-8
+#Wed Feb 22 17:20:19 CET 2012
+eclipse.preferences.version=1
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/haystack/backends/__init__.py=utf-8
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/fields.py=utf-8
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/forms.py=utf-8
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/tests.py=utf-8
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/whoosh/analysis.py=utf8
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/api.py=utf-8
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/wiki.py=utf-8
+encoding//web/hdabo/forms.py=utf-8
+encoding//web/hdabo/management/commands/import_csv.py=utf-8
+encoding//web/hdabo/management/commands/import_tag_popularity.py=utf-8
+encoding//web/hdabo/management/commands/query_wikipedia.py=utf-8
+encoding//web/hdabo/migrations/0001_initial.py=utf-8
+encoding//web/hdabo/migrations/0002_backport_hdabo_sf.py=utf-8
+encoding//web/hdabo/migrations/0003_update_redirection.py=utf-8
+encoding//web/hdabo/models.py=utf-8
+encoding//web/hdabo/search/french_whoosh_backend.py=utf-8
+encoding//web/hdabo/tests/models.py=utf-8
+encoding//web/hdabo/utils.py=utf-8
+encoding//web/hdabo/views.py=utf-8
+encoding//web/hdabo/wp_utils.py=utf-8
+encoding//web/hdalab/__init__.py=utf-8
+encoding//web/hdalab/config.py=utf-8
+encoding//web/hdalab/manage.py=utf-8
+encoding//web/hdalab/management/commands/export_tags_csv.py=utf-8
+encoding//web/hdalab/management/commands/export_wpcategory_csv.py=utf-8
+encoding//web/hdalab/management/commands/fill_country_codes.py=utf-8
+encoding//web/hdalab/management/commands/fill_tag_years.py=utf-8
+encoding//web/hdalab/management/commands/query_dbpedia.py=utf-8
+encoding//web/hdalab/management/commands/query_geo_inclusion.py=utf-8
+encoding//web/hdalab/management/commands/query_wikipedia_category.py=utf-8
+encoding//web/hdalab/management/utils.py=utf-8
+encoding//web/hdalab/migrations/0001_initial.py=utf-8
+encoding//web/hdalab/migrations/0002_dataviz.py=utf-8
+encoding//web/hdalab/migrations/0003_hdasession.py=utf-8
+encoding//web/hdalab/migrations/0005_rename_end_year.py=utf-8
+encoding//web/hdalab/migrations/0006_dbpedia_fields_one_to_one.py=utf-8
+encoding//web/hdalab/migrations/0007_geographic_inclusion.py=utf-8
+encoding//web/hdalab/models/dataviz.py=utf-8
+encoding//web/hdalab/settings.py=utf-8
+encoding//web/hdalab/urls.py=utf-8
+encoding//web/hdalab/views/ajax.py=utf-8
--- a/virtualenv/res/lib/lib_create_env.py	Fri Feb 17 13:07:49 2012 +0100
+++ b/virtualenv/res/lib/lib_create_env.py	Wed Feb 22 18:55:35 2012 +0100
@@ -23,6 +23,7 @@
     'WHOOSH' : { 'setup': 'Whoosh', 'url': 'https://bitbucket.org/mchaput/whoosh/get/tip.tar.bz2', 'local': 'whoosh-1.8.3.tar.bz2'},
     'WIKITOOLS' : { 'setup': 'wikitools', 'url': 'wikitools.tar.bz2', 'local': 'wikitools.tar.bz2'},
     'RDFLIB' : { 'setup': 'rdflib', 'url': 'http://rdflib.googlecode.com/files/rdflib-3.2.0.tar.gz', 'local': 'rdflib-3.2.0.tar.gz'},
+    'SPARQLWRAPPER' : { 'setup': 'SPARQLWrapper', 'url': 'http://downloads.sourceforge.net/project/sparql-wrapper/sparql-wrapper-python/1.5.0/SPARQLWrapper-1.5.0.tar.gz', 'local' : 'SPARQLWrapper-1.5.0.tar.gz'},
 }
 
 if system_str == 'Windows':
Binary file virtualenv/res/src/SPARQLWrapper-1.5.0.tar.gz has changed
--- a/virtualenv/web/res/res_create_env.py	Fri Feb 17 13:07:49 2012 +0100
+++ b/virtualenv/web/res/res_create_env.py	Wed Feb 22 18:55:35 2012 +0100
@@ -35,6 +35,7 @@
     ('WHOOSH', 'pip', None, None),
     ('HAYSTACK', 'pip', None, None),
     ('WIKITOOLS', 'pip', None, None),
+    ('SPARQLWRAPPER', 'pip', None, None),
 ])
 
 if system_str == "Darwin":
--- a/web/hdalab/.htaccess	Fri Feb 17 13:07:49 2012 +0100
+++ b/web/hdalab/.htaccess	Wed Feb 22 18:55:35 2012 +0100
@@ -1,7 +1,7 @@
 
 SetEnv DJANGO_SETTINGS_MODULE hdalab.settings
-SetEnv PROJECT_PATH /Users/ymh/dev/workspace/hdabo/web
-SetEnv PYTHON_PATH /Users/ymh/dev/workspace/hdabo/virtualenv/web/env/hdabo/lib/python2.6/site-packages:/Applications/Dev/eclipse/plugins/org.python.pydev.debug_2.2.0.2011062419/pysrc/
+SetEnv PROJECT_PATH /home/raph/shared/hdabo/web
+SetEnv PYTHON_PATH /home/raph/venv_hdabo/env/lib/python2.7/site-packages
 SetEnv PYDEV_DEBUG False
 
 Options ExecCGI FollowSymLinks
@@ -13,7 +13,7 @@
 
 RewriteEngine On
 RewriteCond %{REQUEST_FILENAME} !-f
-RewriteRule ^(.*)$ /~ymh/hdabo/hdalab/modwsgi.wsgi/$1 [QSA,PT,L]
+RewriteRule ^(.*)$ /hda/hdalab/modwsgi.wsgi/$1 [QSA,PT,L]
 
 Header set Pragma "no-cache"
 Header set Cache-Control "no-cache"
--- a/web/hdalab/management/commands/fill_tag_years.py	Fri Feb 17 13:07:49 2012 +0100
+++ b/web/hdalab/management/commands/fill_tag_years.py	Wed Feb 22 18:55:35 2012 +0100
@@ -31,7 +31,7 @@
                 start = int(range[0][1])
                 end = start
                 if range[0][0]:
-                    end = start + 9
+                    end = start + 10
                 if range[0][2]:
                     end = int(range[0][2])
             else:
@@ -46,7 +46,7 @@
                             start = -100*val
                         else:
                             start = 1 + 100*(val-1)
-                        end = start + 99
+                        end = start + 100
             
             if start != None and end != None:
                 ty = TagYears.objects.create(tag=tag, start_year=start, end_year=end)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/management/commands/query_geo_inclusion.py	Wed Feb 22 18:55:35 2012 +0100
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Feb 22, 2012
+
+@author: raphv
+'''
+
+from django.core.management.base import NoArgsCommand
+from django.core.management.color import no_style
+from hdabo.utils import show_progress
+from hdabo.models import Tag
+from hdalab.models import Country, GeoInclusion
+from SPARQLWrapper import SPARQLWrapper, JSON
+import re
+
+class Command(NoArgsCommand):
+    def handle_noargs(self, **options):
+        self.style = no_style()
+
+        qs = Tag.objects.filter(category__label="Localisation").exclude(dbpedia_uri = None)
+        total = qs.count()
+        
+        endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
+        endpoint.setReturnFormat(JSON)
+        sparqltext = """
+            SELECT ?resource WHERE {
+             { <%s> ?resource <http://dbpedia.org/ontology/Country> . }
+            UNION
+             { <%s> <http://dbpedia.org/ontology/country> ?resource . }
+            }
+        """
+        resourceprefix = "http://dbpedia.org/resource/"
+        identityuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
+        
+        writer = None
+        
+        for i,tag in enumerate(qs):
+            endpoint.setQuery(sparqltext % (tag.dbpedia_uri, tag.dbpedia_uri))
+    
+            results = endpoint.query().convert()['results']['bindings']
+            
+            if len(results) == 1: # We don't want places located in multiple countries
+                
+                resourceuri = results[0]['resource']['value']
+                
+                if re.match(resourceprefix, resourceuri):
+                    countrytxt = re.findall('([^/]+$)', resourceuri)[0]
+                    
+                    country, created = Country.objects.get_or_create(dbpedia_uri=resourceuri)
+                    GeoInclusion.objects.get_or_create(tag=tag, country=country)
+                    
+                if resourceuri == identityuri:
+                    countrytxt = '<is a country>'
+                    
+                    country, created = Country.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri)
+                    GeoInclusion.objects.get_or_create(tag=tag, country=country)
+                
+            else:
+                countrytxt = '<unknown>'
+            
+            writer = show_progress(i+1, total, '%s => %s'%(tag.label, countrytxt), 50, writer)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/migrations/0007_geographic_inclusion.py	Wed Feb 22 18:55:35 2012 +0100
@@ -0,0 +1,139 @@
+# encoding: utf-8
+import datetime
+from south.db import db
+from south.v2 import SchemaMigration
+from django.db import models
+
+class Migration(SchemaMigration):
+
+    def forwards(self, orm):
+        
+        # Deleting model 'CountryCode'
+        db.delete_table('hdalab_countrycode')
+
+        # Adding model 'GeoInclusion'
+        db.create_table('hdalab_geoinclusion', (
+            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
+            ('tag', self.gf('django.db.models.fields.related.OneToOneField')(related_name='locatedin', unique=True, to=orm['hdabo.Tag'])),
+            ('country', self.gf('django.db.models.fields.related.ForeignKey')(related_name='includes', to=orm['hdalab.Country'])),
+        ))
+        db.send_create_signal('hdalab', ['GeoInclusion'])
+
+        # Adding model 'Country'
+        db.create_table('hdalab_country', (
+            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
+            ('dbpedia_uri', self.gf('django.db.models.fields.URLField')(unique=True, max_length=255, db_index=True)),
+        ))
+        db.send_create_signal('hdalab', ['Country'])
+
+
+    def backwards(self, orm):
+        
+        # Adding model 'CountryCode'
+        db.create_table('hdalab_countrycode', (
+            ('isocode', self.gf('django.db.models.fields.CharField')(max_length=3, unique=True, db_index=True)),
+            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
+            ('label', self.gf('django.db.models.fields.CharField')(max_length=1024, unique=True, db_index=True)),
+        ))
+        db.send_create_signal('hdalab', ['CountryCode'])
+
+        # Deleting model 'GeoInclusion'
+        db.delete_table('hdalab_geoinclusion')
+
+        # Deleting model 'Country'
+        db.delete_table('hdalab_country')
+
+
+    models = {
+        'hdabo.tag': {
+            'Meta': {'unique_together': "(('label', 'original_label', 'url_status'),)", 'object_name': 'Tag'},
+            'alias': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
+            'alternative_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
+            'alternative_wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}),
+            'alternative_wikipedia_url': ('django.db.models.fields.URLField', [], {'db_index': 'True', 'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagCategory']", 'null': 'True', 'blank': 'True'}),
+            'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
+            'dbpedia_uri': ('django.db.models.fields.URLField', [], {'db_index': 'True', 'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
+            'normalized_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
+            'original_label': ('django.db.models.fields.CharField', [], {'max_length': '1024'}),
+            'popularity': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
+            'url_status': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'db_index': 'True', 'blank': 'True'}),
+            'wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}),
+            'wikipedia_url': ('django.db.models.fields.URLField', [], {'db_index': 'True', 'max_length': '2048', 'null': 'True', 'blank': 'True'})
+        },
+        'hdabo.tagcategory': {
+            'Meta': {'object_name': 'TagCategory'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
+        },
+        'hdalab.country': {
+            'Meta': {'object_name': 'Country'},
+            'dbpedia_uri': ('django.db.models.fields.URLField', [], {'unique': 'True', 'max_length': '255', 'db_index': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
+        },
+        'hdalab.dbpediafields': {
+            'Meta': {'object_name': 'DbpediaFields'},
+            'abstract': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'dbpedia_uri': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'db_index': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+            'tag': ('django.db.models.fields.related.OneToOneField', [], {'blank': 'True', 'related_name': "'dbpedia_fields'", 'unique': 'True', 'null': 'True', 'to': "orm['hdabo.Tag']"}),
+            'thumbnail': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'})
+        },
+        'hdalab.geoinclusion': {
+            'Meta': {'object_name': 'GeoInclusion'},
+            'country': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'includes'", 'to': "orm['hdalab.Country']"}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'tag': ('django.db.models.fields.related.OneToOneField', [], {'related_name': "'locatedin'", 'unique': 'True', 'to': "orm['hdabo.Tag']"})
+        },
+        'hdalab.hdasession': {
+            'Meta': {'object_name': 'HdaSession'},
+            'data': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'sessionid': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '36', 'db_index': 'True'})
+        },
+        'hdalab.infoboxparameter': {
+            'Meta': {'unique_together': "(('tag_infobox', 'param_name'),)", 'object_name': 'InfoboxParameter'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'param_name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
+            'param_value': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'tag_infobox': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdalab.TagInfobox']"})
+        },
+        'hdalab.taginfobox': {
+            'Meta': {'unique_together': "(('tag', 'name', 'revision_id'),)", 'object_name': 'TagInfobox'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
+            'revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
+            'source': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+            'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'infoboxes'", 'to': "orm['hdabo.Tag']"})
+        },
+        'hdalab.taglinks': {
+            'Meta': {'object_name': 'TagLinks'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'object': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'taglinks_objects'", 'to': "orm['hdabo.Tag']"}),
+            'subject': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'taglinks_subjects'", 'to': "orm['hdabo.Tag']"})
+        },
+        'hdalab.tagwpcategory': {
+            'Meta': {'unique_together': "(('tag', 'wp_category', 'hidden'),)", 'object_name': 'TagWpCategory'},
+            'hidden': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'wp_categories'", 'to': "orm['hdabo.Tag']"}),
+            'wp_category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdalab.WpCategory']"})
+        },
+        'hdalab.tagyears': {
+            'Meta': {'object_name': 'TagYears'},
+            'end_year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}),
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'start_year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}),
+            'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'years'", 'to': "orm['hdabo.Tag']"})
+        },
+        'hdalab.wpcategory': {
+            'Meta': {'object_name': 'WpCategory'},
+            'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+            'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '2048'})
+        }
+    }
+
+    complete_apps = ['hdalab']
--- a/web/hdalab/models/__init__.py	Fri Feb 17 13:07:49 2012 +0100
+++ b/web/hdalab/models/__init__.py	Wed Feb 22 18:55:35 2012 +0100
@@ -1,5 +1,5 @@
 from hdalab.models.categories import (WpCategory, InfoboxParameter, TagInfobox, 
     TagWpCategory)
-from hdalab.models.dataviz import TagYears, CountryCode, TagLinks, DbpediaFields, HdaSession
+from hdalab.models.dataviz import TagYears, Country, GeoInclusion, TagLinks, DbpediaFields, HdaSession
 
-__all__ = ['WpCategory', 'InfoboxParameter', 'TagInfobox', 'TagWpCategory', 'TagYears', 'CountryCode', 'TagLinks', 'DbpediaFields', 'HdaSession']
\ No newline at end of file
+__all__ = ['WpCategory', 'InfoboxParameter', 'TagInfobox', 'TagWpCategory', 'TagYears', 'Country', 'GeoInclusion', 'TagLinks', 'DbpediaFields', 'HdaSession']
\ No newline at end of file
--- a/web/hdalab/models/dataviz.py	Fri Feb 17 13:07:49 2012 +0100
+++ b/web/hdalab/models/dataviz.py	Wed Feb 22 18:55:35 2012 +0100
@@ -26,10 +26,25 @@
     class Meta:
         app_label = 'hdalab'
 
-class CountryCode(models.Model):
+#class CountryCode(models.Model):
+#
+#    label = models.CharField(max_length=1024, unique=True, blank=False, null=False, db_index=True)
+#    isocode = models.CharField(max_length=3, unique=True, blank=False, null=False, db_index=True)
+#    
+#    class Meta:
+#        app_label = 'hdalab'
 
-    label = models.CharField(max_length=1024, unique=True, blank=False, null=False, db_index=True)
-    isocode = models.CharField(max_length=3, unique=True, blank=False, null=False, db_index=True)
+class Country(models.Model):
+    
+    dbpedia_uri = models.URLField(verify_exists=False, max_length=255, blank=False, null=False, db_index=True, unique=True)
+    
+    class Meta:
+        app_label = 'hdalab'
+
+class GeoInclusion(models.Model):
+    
+    tag = models.OneToOneField(Tag, related_name="locatedin", db_index=True)
+    country = models.ForeignKey(Country, blank=False, null=False, related_name="includes", db_index=True)
     
     class Meta:
         app_label = 'hdalab'
--- a/web/hdalab/views/ajax.py	Fri Feb 17 13:07:49 2012 +0100
+++ b/web/hdalab/views/ajax.py	Wed Feb 22 18:55:35 2012 +0100
@@ -9,7 +9,7 @@
 from django.db.models import Q, Count, Sum
 from django.http import HttpResponse
 from hdabo.models import Tag, Datasheet, TaggedSheet
-from hdalab.models import TagLinks, HdaSession, CountryCode, TagYears
+from hdalab.models import TagLinks, HdaSession, Country, GeoInclusion, TagYears
 import django.utils.simplejson as json
 import hmac
 import uuid
@@ -101,13 +101,10 @@
 
 
     if label or periode or contentlist :
-        
+
         qs = Datasheet.objects.filter(validated=True)
-        
-        #$globalsql = "SELECT id, title, description, url FROM hdabo_datasheet E";
 
         globalids = []
-        #$globalfilters = array();
         
         if periode:
             years = periode.split(",")
@@ -124,7 +121,7 @@
             
             globalids += [t.id for t in tagqs]
             
-            qs = qs.filter(taggedsheet__tag__in = tagqs) 
+            qs = qs.filter(taggedsheet__tag__in = tagqs)
             
         if label:
             for txtlbl in label.split(","):
@@ -138,95 +135,105 @@
         if contentlist:
             qs = qs.filter(id__in = contentlist.split(","))
         
-        if contentlist is None:
-            qs = qs.order_by('?')
+#        if contentlist is None:
+#            qs = qs.order_by('?')
         
+        qs = qs.distinct()
         
-        for content in qs:
-            cont_count += 1
-            contenus[content.id] = {'score' : 0, 'tags' : [], 'id':content.id, 'title': content.title, 'description': content.description, 'url': content.url}
+        contenus = dict([(content.id, {'score' : 0, 'tags' : [], 'id':content.id, 'title': content.title, 'description': content.description, 'url': content.url}) for content in qs])
 
-
-        countries = dict([ (cc.label, {'isocode':cc.isocode, 'score':0}) for cc in CountryCode.objects.all() ])
-        disciplines = dict([ (d.label, {'label':d.label, 'score':0}) for d in Tag.objects.filter(category__label = "Discipline artistique")])
+        #countries = dict([ (cc.label, {'isocode':cc.isocode, 'score':0}) for cc in CountryCode.objects.all() ])
+        #disciplines = dict([ (d.label, {'label':d.label, 'score':0}) for d in Tag.objects.filter(category__label = "Discipline artistique")])
+        
+        countries = {}
+        disciplines = {}
         
         tags = {}
         
         contentids = contenus.keys()
-        qs = TaggedSheet.objects.select_related('tag').filter(datasheet__in = contentids, order__lte = max_tag_order).order_by('order')
+        cont_count = len(contentids)
+        
+        qs = TaggedSheet.objects.select_related('tag','tag__category','tag__locatedin__country').filter(datasheet__in = contentids, order__lte = max_tag_order).order_by('order')
         for ts in qs:
             match_tag = ts.tag.id in globalids
-            contenus[ts.datasheet.id]['tags'].append({'id': ts.tag.id, 'label':ts.tag.label, 'order':ts.order, 'match': match_tag})
+            contenutags = {'id': ts.tag.id, 'label':ts.tag.label, 'order':ts.order, 'match': match_tag }
+            
+#            if ts.tag.category is not None:
+#                contenutags['category'] = ts.tag.category.label
+            
+            contenus[ts.datasheet.id]['tags'].append(contenutags)
+                
             tagscore = 2*max_tag_order - ts.order
-            if ts.tag.id not in tags:
-                tags[ts.tag.id] = {'id':ts.tag.id,'label':ts.tag.label, 'score':0}
-            tags[ts.tag.id]['score'] += tagscore
+            if ts.tag.category is not None and ts.tag.category.label not in [u'Discipline artistique', u'Datation', u'Localisation']:
+                if ts.tag.id not in tags:
+                    tags[ts.tag.id] = {'id':ts.tag.id,'label':ts.tag.label, 'score':0}
+                tags[ts.tag.id]['score'] += 1
+                if match_tag:
+                    tags[ts.tag.id]['match'] = True
+                
             if match_tag:
                 contenus[ts.datasheet.id]['score'] += tagscore
-                tags[ts.tag.id]['match'] = True
-            if ts.tag.label in countries:
-                countries[ts.tag.label]['score'] += tagscore
-            if ts.tag.label in disciplines:
-                disciplines[ts.tag.label]['score'] += tagscore
-
-        content_count = content_count if not contentlist else len(contenus)
-        contenus = sorted(contenus.values(),key=lambda e: e.get('score', 0))[0:content_count]
+                
+            if ts.tag.category is not None and ts.tag.category.label == u'Discipline artistique':
+                if ts.tag.label not in disciplines:
+                    disciplines[ts.tag.label] = {'label':ts.tag.label, 'score':0}
+                disciplines[ts.tag.label]['score'] += 1
+                
+            if ts.tag.locatedin is not None:
+                country_id = ts.tag.locatedin.country.id
+                if country_id not in countries:
+                    countries[country_id] = {'id':country_id, 'dbpedia_uri':ts.tag.locatedin.country.dbpedia_uri, 'score': 0}
+                countries[country_id]['score'] += 1
+            
+        if contentlist is None:
+            contenus = sorted(contenus.values(),key=lambda e: -e['score'])[0:content_count]
+            contenus = [contenu for contenu in contenus if contenu['score']]
+            
         countries = countries.values()
-        tags = sorted(tags.values(), key=lambda e: e.get('score', 0))[0:tag_count]
-        disciplines = sorted(disciplines.values(), key=lambda e: e.get('score', 0))[0:10]
+        tags = sorted(tags.values(), key=lambda e: -e['score'])[0:tag_count]
+        disciplines = sorted(disciplines.values(), key=lambda e: -e['score'])[0:10]
         years = {}
+        
         if contentids:
-            qs = TagYears.objects.values('start_year', 'end_year').annotate(order_count=Count('tag__taggedsheet'), order_sum=Sum("tag__taggedsheet__order")).filter(tag__taggedsheet__order__lte=max_tag_order, tag__taggedsheet__datasheet__in = contentids)
+            qs = TagYears.objects.filter(tag__taggedsheet__datasheet__in = contentids).annotate(nb=Count('tag__taggedsheet'))
             for ty in qs:
-                for year in range(ty['start_year'], ty['end_year']):
-                    years[year] = (2*max_tag_order*ty['order_count']-ty['order_sum'])/(ty['end_year']-ty['start_year']) + years[year] if year in years else 0
-#        $rq = pg_query("SELECT U.start_year, U.end_year, SUM(".(2*$max_tag_order)." - V.order)/(U.end_year + 1 - U.start_year) score FROM hdaviz_years U, hdabo_taggedsheet V WHERE U.tag_id = V.tag_id AND V.order <= $max_tag_order AND V.datasheet_id IN ($contentids) GROUP BY U.start_year, U.end_year");
-#        while($ligne = pg_fetch_row($rq)) {
-#            foreach(range($ligne[0], $ligne[1]) as $year) {
-#                $years[$year] = $ligne[2] + ( isset($years[$year]) ? $years[$year] : 0 );
+                for year in range(ty.start_year, ty.end_year):
+                    years[year] = ty.nb + years[year] if year in years else 0
 
     else:
         
         for ds in Datasheet.objects.order_by("?")[:content_count]:
             contenus[ds.id] = {'id':ds.id, 'title':ds.title, 'description':ds.description, 'url':ds.url, 'tags':[]}
+            
         cont_count = Datasheet.objects.count()
         
         qs = TaggedSheet.objects.select_related('tag','datasheet').filter(datasheet__id__in = contenus.keys(), order__lte = max_tag_order).order_by("order").only('order','tag__label','tag__id','datasheet__id')
         for ts in qs:
             contenus[ts.datasheet.id]['tags'].append({'id': ts.tag.id, 'label':ts.tag.label, 'order':ts.order})
         contenus = contenus.values()
+        
+        qs = Tag.objects.exclude(category__label = u"Localisation").exclude(category__label = u"Datation").exclude(category__label = u"Discipline artistice").annotate(nb=Count('datasheet')).order_by('-nb')[:tag_count]
+        tags = [{'id': tag.id, 'label': tag.label, 'score': tag.nb} for tag in qs]
+            
+        qs = Country.objects.annotate(nb=Count('includes__tag__taggedsheet')).order_by('-nb')
+        countries = [{'id': country.id, 'dbpedia_uri': country.dbpedia_uri, 'score': country.nb} for country in qs]
 
-        cursor = connection.cursor() #@UndefinedVariable
-        try:
-            cursor.execute("select t.id, t.label, sum(2*%s-ts.order) as score from hdabo_tag as t join hdabo_taggedsheet as ts on t.id = ts.tag_id where ts.order < %s group by t.id, t.label order by score limit %s",[max_tag_order, max_tag_order, tag_count])
-            tags = [{'id': t[0], 'label':t[1], 'score':t[2]} for t in cursor.fetchall()]
-            #tags = sorted([{'id':tag.id,'label':tag.label,'score':2*max_tag_order*tag.count_score - tag.sum_score} for tag in qs], key=lambda t:t['score'])[:tag_count]                    
-        finally:
-            cursor.close()
-        cursor = connection.cursor() #@UndefinedVariable
-        try:
-            cursor.execute("select c.isocode as isocode, sum(2*%s-ts.order) as score from hdabo_tag as t join hdabo_taggedsheet as ts on t.id = ts.tag_id join hdalab_countrycode as c on t.label = c.label group by c.isocode", [max_tag_order])
-            countries = [{'isocode': r[0], 'score':r[1]} for r in cursor.fetchall()]
-            
-        finally:
-            cursor.close()
-
-        qs = Tag.objects.annotate(count_score=Count('taggedsheet'),sum_score=Sum('taggedsheet__order')).filter(taggedsheet__order__lte = max_tag_order, category__label__iexact = u"Discipline")
-        disciplines = sorted([{'label':tag.label,'score':2*max_tag_order*tag.count_score - tag.sum_score} for tag in qs], key=lambda t:t.score, reverse=True)[:10]
+        qs = Tag.objects.annotate(nb=Count('taggedsheet')).filter(category = 5).order_by('-nb')[:10]
+        disciplines = [{'label':tag.label,'score':tag.nb} for tag in qs]
 
         years = {}
-        qs = TagYears.objects.values('start_year', 'end_year').annotate(order_count=Count('tag__taggedsheet'), order_sum=Sum("tag__taggedsheet__order")).filter(tag__taggedsheet__order__lte=max_tag_order)
+        qs = TagYears.objects.annotate(nb=Count('tag__taggedsheet'))
         for ty in qs:
-            for year in range(ty['start_year'], ty['end_year']):
-                years[year] = (2*max_tag_order*ty['order_count']-ty['order_sum'])/(ty['end_year']-ty['start_year']) + years[year] if year in years else 0
+            for year in range(ty.start_year, ty.end_year):
+                years[year] = ty.nb + years[year] if year in years else 0
 
     yearchange = []
     for year in sorted(years.keys()):
         score = years[year]
         if year < 2011:
-            if year-1 not in years or years[year-1] != score:
+            if (year-1 not in years and score != 0) or (year-1 in years and years[year-1] != score):
                 yearchange.append({'year': year, 'score': score})
-            if year+1 not in years and year != -1:
+            if year+1 not in years and year != -1 and score != 0:
                 yearchange.append({'year': year+1, 'score': 0})
 
     output = {'count': cont_count, 'contents': contenus, 'tags':tags, 'sparkline':yearchange, 'countries':countries, 'disciplines':disciplines}