--- a/.hgignore Fri Feb 17 13:07:49 2012 +0100
+++ b/.hgignore Wed Feb 22 18:55:35 2012 +0100
@@ -20,4 +20,8 @@
syntax: regexp
\.sh$
syntax: regexp
-^web/hdalab/config\.py$
\ No newline at end of file
+^web/hdalab/config\.py$
+syntax: regexp
+^web/hdalab/\.htaccess$
+syntax: regexp
+^web/hdalab/\.htaccess$
\ No newline at end of file
--- a/.settings/org.eclipse.core.resources.prefs Fri Feb 17 13:07:49 2012 +0100
+++ b/.settings/org.eclipse.core.resources.prefs Wed Feb 22 18:55:35 2012 +0100
@@ -1,40 +1,43 @@
-#Wed Feb 15 16:29:39 CET 2012
-eclipse.preferences.version=1
-encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/haystack/backends/__init__.py=utf-8
-encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/fields.py=utf-8
-encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/forms.py=utf-8
-encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/tests.py=utf-8
-encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/whoosh/analysis.py=utf8
-encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/api.py=utf-8
-encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/wiki.py=utf-8
-encoding//web/hdabo/forms.py=utf-8
-encoding//web/hdabo/management/commands/import_csv.py=utf-8
-encoding//web/hdabo/management/commands/import_tag_popularity.py=utf-8
-encoding//web/hdabo/management/commands/query_wikipedia.py=utf-8
-encoding//web/hdabo/migrations/0001_initial.py=utf-8
-encoding//web/hdabo/migrations/0002_backport_hdabo_sf.py=utf-8
-encoding//web/hdabo/migrations/0003_update_redirection.py=utf-8
-encoding//web/hdabo/models.py=utf-8
-encoding//web/hdabo/search/french_whoosh_backend.py=utf-8
-encoding//web/hdabo/tests/models.py=utf-8
-encoding//web/hdabo/utils.py=utf-8
-encoding//web/hdabo/views.py=utf-8
-encoding//web/hdabo/wp_utils.py=utf-8
-encoding//web/hdalab/__init__.py=utf-8
-encoding//web/hdalab/config.py=utf-8
-encoding//web/hdalab/manage.py=utf-8
-encoding//web/hdalab/management/commands/export_tags_csv.py=utf-8
-encoding//web/hdalab/management/commands/export_wpcategory_csv.py=utf-8
-encoding//web/hdalab/management/commands/fill_country_codes.py=utf-8
-encoding//web/hdalab/management/commands/fill_tag_years.py=utf-8
-encoding//web/hdalab/management/commands/query_dbpedia.py=utf-8
-encoding//web/hdalab/management/commands/query_wikipedia_category.py=utf-8
-encoding//web/hdalab/management/utils.py=utf-8
-encoding//web/hdalab/migrations/0001_initial.py=utf-8
-encoding//web/hdalab/migrations/0002_dataviz.py=utf-8
-encoding//web/hdalab/migrations/0003_hdasession.py=utf-8
-encoding//web/hdalab/migrations/0005_rename_end_year.py=utf-8
-encoding//web/hdalab/models/dataviz.py=utf-8
-encoding//web/hdalab/settings.py=utf-8
-encoding//web/hdalab/urls.py=utf-8
-encoding//web/hdalab/views/ajax.py=utf-8
+#Wed Feb 22 17:20:19 CET 2012
+eclipse.preferences.version=1
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/haystack/backends/__init__.py=utf-8
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/fields.py=utf-8
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/forms.py=utf-8
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/tests.py=utf-8
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/whoosh/analysis.py=utf8
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/api.py=utf-8
+encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/wiki.py=utf-8
+encoding//web/hdabo/forms.py=utf-8
+encoding//web/hdabo/management/commands/import_csv.py=utf-8
+encoding//web/hdabo/management/commands/import_tag_popularity.py=utf-8
+encoding//web/hdabo/management/commands/query_wikipedia.py=utf-8
+encoding//web/hdabo/migrations/0001_initial.py=utf-8
+encoding//web/hdabo/migrations/0002_backport_hdabo_sf.py=utf-8
+encoding//web/hdabo/migrations/0003_update_redirection.py=utf-8
+encoding//web/hdabo/models.py=utf-8
+encoding//web/hdabo/search/french_whoosh_backend.py=utf-8
+encoding//web/hdabo/tests/models.py=utf-8
+encoding//web/hdabo/utils.py=utf-8
+encoding//web/hdabo/views.py=utf-8
+encoding//web/hdabo/wp_utils.py=utf-8
+encoding//web/hdalab/__init__.py=utf-8
+encoding//web/hdalab/config.py=utf-8
+encoding//web/hdalab/manage.py=utf-8
+encoding//web/hdalab/management/commands/export_tags_csv.py=utf-8
+encoding//web/hdalab/management/commands/export_wpcategory_csv.py=utf-8
+encoding//web/hdalab/management/commands/fill_country_codes.py=utf-8
+encoding//web/hdalab/management/commands/fill_tag_years.py=utf-8
+encoding//web/hdalab/management/commands/query_dbpedia.py=utf-8
+encoding//web/hdalab/management/commands/query_geo_inclusion.py=utf-8
+encoding//web/hdalab/management/commands/query_wikipedia_category.py=utf-8
+encoding//web/hdalab/management/utils.py=utf-8
+encoding//web/hdalab/migrations/0001_initial.py=utf-8
+encoding//web/hdalab/migrations/0002_dataviz.py=utf-8
+encoding//web/hdalab/migrations/0003_hdasession.py=utf-8
+encoding//web/hdalab/migrations/0005_rename_end_year.py=utf-8
+encoding//web/hdalab/migrations/0006_dbpedia_fields_one_to_one.py=utf-8
+encoding//web/hdalab/migrations/0007_geographic_inclusion.py=utf-8
+encoding//web/hdalab/models/dataviz.py=utf-8
+encoding//web/hdalab/settings.py=utf-8
+encoding//web/hdalab/urls.py=utf-8
+encoding//web/hdalab/views/ajax.py=utf-8
--- a/virtualenv/res/lib/lib_create_env.py Fri Feb 17 13:07:49 2012 +0100
+++ b/virtualenv/res/lib/lib_create_env.py Wed Feb 22 18:55:35 2012 +0100
@@ -23,6 +23,7 @@
'WHOOSH' : { 'setup': 'Whoosh', 'url': 'https://bitbucket.org/mchaput/whoosh/get/tip.tar.bz2', 'local': 'whoosh-1.8.3.tar.bz2'},
'WIKITOOLS' : { 'setup': 'wikitools', 'url': 'wikitools.tar.bz2', 'local': 'wikitools.tar.bz2'},
'RDFLIB' : { 'setup': 'rdflib', 'url': 'http://rdflib.googlecode.com/files/rdflib-3.2.0.tar.gz', 'local': 'rdflib-3.2.0.tar.gz'},
+ 'SPARQLWRAPPER' : { 'setup': 'SPARQLWrapper', 'url': 'http://downloads.sourceforge.net/project/sparql-wrapper/sparql-wrapper-python/1.5.0/SPARQLWrapper-1.5.0.tar.gz', 'local' : 'SPARQLWrapper-1.5.0.tar.gz'},
}
if system_str == 'Windows':
Binary file virtualenv/res/src/SPARQLWrapper-1.5.0.tar.gz has changed
--- a/virtualenv/web/res/res_create_env.py Fri Feb 17 13:07:49 2012 +0100
+++ b/virtualenv/web/res/res_create_env.py Wed Feb 22 18:55:35 2012 +0100
@@ -35,6 +35,7 @@
('WHOOSH', 'pip', None, None),
('HAYSTACK', 'pip', None, None),
('WIKITOOLS', 'pip', None, None),
+ ('SPARQLWRAPPER', 'pip', None, None),
])
if system_str == "Darwin":
--- a/web/hdalab/.htaccess Fri Feb 17 13:07:49 2012 +0100
+++ b/web/hdalab/.htaccess Wed Feb 22 18:55:35 2012 +0100
@@ -1,7 +1,7 @@
SetEnv DJANGO_SETTINGS_MODULE hdalab.settings
-SetEnv PROJECT_PATH /Users/ymh/dev/workspace/hdabo/web
-SetEnv PYTHON_PATH /Users/ymh/dev/workspace/hdabo/virtualenv/web/env/hdabo/lib/python2.6/site-packages:/Applications/Dev/eclipse/plugins/org.python.pydev.debug_2.2.0.2011062419/pysrc/
+SetEnv PROJECT_PATH /home/raph/shared/hdabo/web
+SetEnv PYTHON_PATH /home/raph/venv_hdabo/env/lib/python2.7/site-packages
SetEnv PYDEV_DEBUG False
Options ExecCGI FollowSymLinks
@@ -13,7 +13,7 @@
RewriteEngine On
RewriteCond %{REQUEST_FILENAME} !-f
-RewriteRule ^(.*)$ /~ymh/hdabo/hdalab/modwsgi.wsgi/$1 [QSA,PT,L]
+RewriteRule ^(.*)$ /hda/hdalab/modwsgi.wsgi/$1 [QSA,PT,L]
Header set Pragma "no-cache"
Header set Cache-Control "no-cache"
--- a/web/hdalab/management/commands/fill_tag_years.py Fri Feb 17 13:07:49 2012 +0100
+++ b/web/hdalab/management/commands/fill_tag_years.py Wed Feb 22 18:55:35 2012 +0100
@@ -31,7 +31,7 @@
start = int(range[0][1])
end = start
if range[0][0]:
- end = start + 9
+ end = start + 10
if range[0][2]:
end = int(range[0][2])
else:
@@ -46,7 +46,7 @@
start = -100*val
else:
start = 1 + 100*(val-1)
- end = start + 99
+ end = start + 100
if start != None and end != None:
ty = TagYears.objects.create(tag=tag, start_year=start, end_year=end)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/management/commands/query_geo_inclusion.py Wed Feb 22 18:55:35 2012 +0100
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Feb 22, 2012
+
+@author: raphv
+'''
+
+from django.core.management.base import NoArgsCommand
+from django.core.management.color import no_style
+from hdabo.utils import show_progress
+from hdabo.models import Tag
+from hdalab.models import Country, GeoInclusion
+from SPARQLWrapper import SPARQLWrapper, JSON
+import re
+
+class Command(NoArgsCommand):
+ def handle_noargs(self, **options):
+ self.style = no_style()
+
+ qs = Tag.objects.filter(category__label="Localisation").exclude(dbpedia_uri = None)
+ total = qs.count()
+
+ endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
+ endpoint.setReturnFormat(JSON)
+ sparqltext = """
+ SELECT ?resource WHERE {
+ { <%s> ?resource <http://dbpedia.org/ontology/Country> . }
+ UNION
+ { <%s> <http://dbpedia.org/ontology/country> ?resource . }
+ }
+ """
+ resourceprefix = "http://dbpedia.org/resource/"
+ identityuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
+
+ writer = None
+
+ for i,tag in enumerate(qs):
+ endpoint.setQuery(sparqltext % (tag.dbpedia_uri, tag.dbpedia_uri))
+
+ results = endpoint.query().convert()['results']['bindings']
+
+ if len(results) == 1: # We don't want places located in multiple countries
+
+ resourceuri = results[0]['resource']['value']
+
+ if re.match(resourceprefix, resourceuri):
+ countrytxt = re.findall('([^/]+$)', resourceuri)[0]
+
+ country, created = Country.objects.get_or_create(dbpedia_uri=resourceuri)
+ GeoInclusion.objects.get_or_create(tag=tag, country=country)
+
+ if resourceuri == identityuri:
+ countrytxt = '<is a country>'
+
+ country, created = Country.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri)
+ GeoInclusion.objects.get_or_create(tag=tag, country=country)
+
+ else:
+ countrytxt = '<unknown>'
+
+ writer = show_progress(i+1, total, '%s => %s'%(tag.label, countrytxt), 50, writer)
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/web/hdalab/migrations/0007_geographic_inclusion.py Wed Feb 22 18:55:35 2012 +0100
@@ -0,0 +1,139 @@
+# encoding: utf-8
+import datetime
+from south.db import db
+from south.v2 import SchemaMigration
+from django.db import models
+
+class Migration(SchemaMigration):
+
+ def forwards(self, orm):
+
+ # Deleting model 'CountryCode'
+ db.delete_table('hdalab_countrycode')
+
+ # Adding model 'GeoInclusion'
+ db.create_table('hdalab_geoinclusion', (
+ ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
+ ('tag', self.gf('django.db.models.fields.related.OneToOneField')(related_name='locatedin', unique=True, to=orm['hdabo.Tag'])),
+ ('country', self.gf('django.db.models.fields.related.ForeignKey')(related_name='includes', to=orm['hdalab.Country'])),
+ ))
+ db.send_create_signal('hdalab', ['GeoInclusion'])
+
+ # Adding model 'Country'
+ db.create_table('hdalab_country', (
+ ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
+ ('dbpedia_uri', self.gf('django.db.models.fields.URLField')(unique=True, max_length=255, db_index=True)),
+ ))
+ db.send_create_signal('hdalab', ['Country'])
+
+
+ def backwards(self, orm):
+
+ # Adding model 'CountryCode'
+ db.create_table('hdalab_countrycode', (
+ ('isocode', self.gf('django.db.models.fields.CharField')(max_length=3, unique=True, db_index=True)),
+ ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
+ ('label', self.gf('django.db.models.fields.CharField')(max_length=1024, unique=True, db_index=True)),
+ ))
+ db.send_create_signal('hdalab', ['CountryCode'])
+
+ # Deleting model 'GeoInclusion'
+ db.delete_table('hdalab_geoinclusion')
+
+ # Deleting model 'Country'
+ db.delete_table('hdalab_country')
+
+
+ models = {
+ 'hdabo.tag': {
+ 'Meta': {'unique_together': "(('label', 'original_label', 'url_status'),)", 'object_name': 'Tag'},
+ 'alias': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
+ 'alternative_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}),
+ 'alternative_wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}),
+ 'alternative_wikipedia_url': ('django.db.models.fields.URLField', [], {'db_index': 'True', 'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+ 'category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagCategory']", 'null': 'True', 'blank': 'True'}),
+ 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
+ 'dbpedia_uri': ('django.db.models.fields.URLField', [], {'db_index': 'True', 'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+ 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+ 'label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
+ 'normalized_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}),
+ 'original_label': ('django.db.models.fields.CharField', [], {'max_length': '1024'}),
+ 'popularity': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}),
+ 'url_status': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'db_index': 'True', 'blank': 'True'}),
+ 'wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}),
+ 'wikipedia_url': ('django.db.models.fields.URLField', [], {'db_index': 'True', 'max_length': '2048', 'null': 'True', 'blank': 'True'})
+ },
+ 'hdabo.tagcategory': {
+ 'Meta': {'object_name': 'TagCategory'},
+ 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+ 'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'})
+ },
+ 'hdalab.country': {
+ 'Meta': {'object_name': 'Country'},
+ 'dbpedia_uri': ('django.db.models.fields.URLField', [], {'unique': 'True', 'max_length': '255', 'db_index': 'True'}),
+ 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'})
+ },
+ 'hdalab.dbpediafields': {
+ 'Meta': {'object_name': 'DbpediaFields'},
+ 'abstract': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+ 'dbpedia_uri': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'db_index': 'True'}),
+ 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+ 'label': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}),
+ 'tag': ('django.db.models.fields.related.OneToOneField', [], {'blank': 'True', 'related_name': "'dbpedia_fields'", 'unique': 'True', 'null': 'True', 'to': "orm['hdabo.Tag']"}),
+ 'thumbnail': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'})
+ },
+ 'hdalab.geoinclusion': {
+ 'Meta': {'object_name': 'GeoInclusion'},
+ 'country': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'includes'", 'to': "orm['hdalab.Country']"}),
+ 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+ 'tag': ('django.db.models.fields.related.OneToOneField', [], {'related_name': "'locatedin'", 'unique': 'True', 'to': "orm['hdabo.Tag']"})
+ },
+ 'hdalab.hdasession': {
+ 'Meta': {'object_name': 'HdaSession'},
+ 'data': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+ 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+ 'sessionid': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '36', 'db_index': 'True'})
+ },
+ 'hdalab.infoboxparameter': {
+ 'Meta': {'unique_together': "(('tag_infobox', 'param_name'),)", 'object_name': 'InfoboxParameter'},
+ 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+ 'param_name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
+ 'param_value': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+ 'tag_infobox': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdalab.TagInfobox']"})
+ },
+ 'hdalab.taginfobox': {
+ 'Meta': {'unique_together': "(('tag', 'name', 'revision_id'),)", 'object_name': 'TagInfobox'},
+ 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+ 'name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}),
+ 'revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}),
+ 'source': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}),
+ 'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'infoboxes'", 'to': "orm['hdabo.Tag']"})
+ },
+ 'hdalab.taglinks': {
+ 'Meta': {'object_name': 'TagLinks'},
+ 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+ 'object': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'taglinks_objects'", 'to': "orm['hdabo.Tag']"}),
+ 'subject': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'taglinks_subjects'", 'to': "orm['hdabo.Tag']"})
+ },
+ 'hdalab.tagwpcategory': {
+ 'Meta': {'unique_together': "(('tag', 'wp_category', 'hidden'),)", 'object_name': 'TagWpCategory'},
+ 'hidden': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
+ 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+ 'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'wp_categories'", 'to': "orm['hdabo.Tag']"}),
+ 'wp_category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdalab.WpCategory']"})
+ },
+ 'hdalab.tagyears': {
+ 'Meta': {'object_name': 'TagYears'},
+ 'end_year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}),
+ 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+ 'start_year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}),
+ 'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'years'", 'to': "orm['hdabo.Tag']"})
+ },
+ 'hdalab.wpcategory': {
+ 'Meta': {'object_name': 'WpCategory'},
+ 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
+ 'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '2048'})
+ }
+ }
+
+ complete_apps = ['hdalab']
--- a/web/hdalab/models/__init__.py Fri Feb 17 13:07:49 2012 +0100
+++ b/web/hdalab/models/__init__.py Wed Feb 22 18:55:35 2012 +0100
@@ -1,5 +1,5 @@
from hdalab.models.categories import (WpCategory, InfoboxParameter, TagInfobox,
TagWpCategory)
-from hdalab.models.dataviz import TagYears, CountryCode, TagLinks, DbpediaFields, HdaSession
+from hdalab.models.dataviz import TagYears, Country, GeoInclusion, TagLinks, DbpediaFields, HdaSession
-__all__ = ['WpCategory', 'InfoboxParameter', 'TagInfobox', 'TagWpCategory', 'TagYears', 'CountryCode', 'TagLinks', 'DbpediaFields', 'HdaSession']
\ No newline at end of file
+__all__ = ['WpCategory', 'InfoboxParameter', 'TagInfobox', 'TagWpCategory', 'TagYears', 'Country', 'GeoInclusion', 'TagLinks', 'DbpediaFields', 'HdaSession']
\ No newline at end of file
--- a/web/hdalab/models/dataviz.py Fri Feb 17 13:07:49 2012 +0100
+++ b/web/hdalab/models/dataviz.py Wed Feb 22 18:55:35 2012 +0100
@@ -26,10 +26,25 @@
class Meta:
app_label = 'hdalab'
-class CountryCode(models.Model):
+#class CountryCode(models.Model):
+#
+# label = models.CharField(max_length=1024, unique=True, blank=False, null=False, db_index=True)
+# isocode = models.CharField(max_length=3, unique=True, blank=False, null=False, db_index=True)
+#
+# class Meta:
+# app_label = 'hdalab'
- label = models.CharField(max_length=1024, unique=True, blank=False, null=False, db_index=True)
- isocode = models.CharField(max_length=3, unique=True, blank=False, null=False, db_index=True)
+class Country(models.Model):
+
+ dbpedia_uri = models.URLField(verify_exists=False, max_length=255, blank=False, null=False, db_index=True, unique=True)
+
+ class Meta:
+ app_label = 'hdalab'
+
+class GeoInclusion(models.Model):
+
+ tag = models.OneToOneField(Tag, related_name="locatedin", db_index=True)
+ country = models.ForeignKey(Country, blank=False, null=False, related_name="includes", db_index=True)
class Meta:
app_label = 'hdalab'
--- a/web/hdalab/views/ajax.py Fri Feb 17 13:07:49 2012 +0100
+++ b/web/hdalab/views/ajax.py Wed Feb 22 18:55:35 2012 +0100
@@ -9,7 +9,7 @@
from django.db.models import Q, Count, Sum
from django.http import HttpResponse
from hdabo.models import Tag, Datasheet, TaggedSheet
-from hdalab.models import TagLinks, HdaSession, CountryCode, TagYears
+from hdalab.models import TagLinks, HdaSession, Country, GeoInclusion, TagYears
import django.utils.simplejson as json
import hmac
import uuid
@@ -101,13 +101,10 @@
if label or periode or contentlist :
-
+
qs = Datasheet.objects.filter(validated=True)
-
- #$globalsql = "SELECT id, title, description, url FROM hdabo_datasheet E";
globalids = []
- #$globalfilters = array();
if periode:
years = periode.split(",")
@@ -124,7 +121,7 @@
globalids += [t.id for t in tagqs]
- qs = qs.filter(taggedsheet__tag__in = tagqs)
+ qs = qs.filter(taggedsheet__tag__in = tagqs)
if label:
for txtlbl in label.split(","):
@@ -138,95 +135,105 @@
if contentlist:
qs = qs.filter(id__in = contentlist.split(","))
- if contentlist is None:
- qs = qs.order_by('?')
+# if contentlist is None:
+# qs = qs.order_by('?')
+ qs = qs.distinct()
- for content in qs:
- cont_count += 1
- contenus[content.id] = {'score' : 0, 'tags' : [], 'id':content.id, 'title': content.title, 'description': content.description, 'url': content.url}
+ contenus = dict([(content.id, {'score' : 0, 'tags' : [], 'id':content.id, 'title': content.title, 'description': content.description, 'url': content.url}) for content in qs])
-
- countries = dict([ (cc.label, {'isocode':cc.isocode, 'score':0}) for cc in CountryCode.objects.all() ])
- disciplines = dict([ (d.label, {'label':d.label, 'score':0}) for d in Tag.objects.filter(category__label = "Discipline artistique")])
+ #countries = dict([ (cc.label, {'isocode':cc.isocode, 'score':0}) for cc in CountryCode.objects.all() ])
+ #disciplines = dict([ (d.label, {'label':d.label, 'score':0}) for d in Tag.objects.filter(category__label = "Discipline artistique")])
+
+ countries = {}
+ disciplines = {}
tags = {}
contentids = contenus.keys()
- qs = TaggedSheet.objects.select_related('tag').filter(datasheet__in = contentids, order__lte = max_tag_order).order_by('order')
+ cont_count = len(contentids)
+
+ qs = TaggedSheet.objects.select_related('tag','tag__category','tag__locatedin__country').filter(datasheet__in = contentids, order__lte = max_tag_order).order_by('order')
for ts in qs:
match_tag = ts.tag.id in globalids
- contenus[ts.datasheet.id]['tags'].append({'id': ts.tag.id, 'label':ts.tag.label, 'order':ts.order, 'match': match_tag})
+ contenutags = {'id': ts.tag.id, 'label':ts.tag.label, 'order':ts.order, 'match': match_tag }
+
+# if ts.tag.category is not None:
+# contenutags['category'] = ts.tag.category.label
+
+ contenus[ts.datasheet.id]['tags'].append(contenutags)
+
tagscore = 2*max_tag_order - ts.order
- if ts.tag.id not in tags:
- tags[ts.tag.id] = {'id':ts.tag.id,'label':ts.tag.label, 'score':0}
- tags[ts.tag.id]['score'] += tagscore
+ if ts.tag.category is not None and ts.tag.category.label not in [u'Discipline artistique', u'Datation', u'Localisation']:
+ if ts.tag.id not in tags:
+ tags[ts.tag.id] = {'id':ts.tag.id,'label':ts.tag.label, 'score':0}
+ tags[ts.tag.id]['score'] += 1
+ if match_tag:
+ tags[ts.tag.id]['match'] = True
+
if match_tag:
contenus[ts.datasheet.id]['score'] += tagscore
- tags[ts.tag.id]['match'] = True
- if ts.tag.label in countries:
- countries[ts.tag.label]['score'] += tagscore
- if ts.tag.label in disciplines:
- disciplines[ts.tag.label]['score'] += tagscore
-
- content_count = content_count if not contentlist else len(contenus)
- contenus = sorted(contenus.values(),key=lambda e: e.get('score', 0))[0:content_count]
+
+ if ts.tag.category is not None and ts.tag.category.label == u'Discipline artistique':
+ if ts.tag.label not in disciplines:
+ disciplines[ts.tag.label] = {'label':ts.tag.label, 'score':0}
+ disciplines[ts.tag.label]['score'] += 1
+
+ if ts.tag.locatedin is not None:
+ country_id = ts.tag.locatedin.country.id
+ if country_id not in countries:
+ countries[country_id] = {'id':country_id, 'dbpedia_uri':ts.tag.locatedin.country.dbpedia_uri, 'score': 0}
+ countries[country_id]['score'] += 1
+
+ if contentlist is None:
+ contenus = sorted(contenus.values(),key=lambda e: -e['score'])[0:content_count]
+ contenus = [contenu for contenu in contenus if contenu['score']]
+
countries = countries.values()
- tags = sorted(tags.values(), key=lambda e: e.get('score', 0))[0:tag_count]
- disciplines = sorted(disciplines.values(), key=lambda e: e.get('score', 0))[0:10]
+ tags = sorted(tags.values(), key=lambda e: -e['score'])[0:tag_count]
+ disciplines = sorted(disciplines.values(), key=lambda e: -e['score'])[0:10]
years = {}
+
if contentids:
- qs = TagYears.objects.values('start_year', 'end_year').annotate(order_count=Count('tag__taggedsheet'), order_sum=Sum("tag__taggedsheet__order")).filter(tag__taggedsheet__order__lte=max_tag_order, tag__taggedsheet__datasheet__in = contentids)
+ qs = TagYears.objects.filter(tag__taggedsheet__datasheet__in = contentids).annotate(nb=Count('tag__taggedsheet'))
for ty in qs:
- for year in range(ty['start_year'], ty['end_year']):
- years[year] = (2*max_tag_order*ty['order_count']-ty['order_sum'])/(ty['end_year']-ty['start_year']) + years[year] if year in years else 0
-# $rq = pg_query("SELECT U.start_year, U.end_year, SUM(".(2*$max_tag_order)." - V.order)/(U.end_year + 1 - U.start_year) score FROM hdaviz_years U, hdabo_taggedsheet V WHERE U.tag_id = V.tag_id AND V.order <= $max_tag_order AND V.datasheet_id IN ($contentids) GROUP BY U.start_year, U.end_year");
-# while($ligne = pg_fetch_row($rq)) {
-# foreach(range($ligne[0], $ligne[1]) as $year) {
-# $years[$year] = $ligne[2] + ( isset($years[$year]) ? $years[$year] : 0 );
+ for year in range(ty.start_year, ty.end_year):
+ years[year] = ty.nb + years[year] if year in years else 0
else:
for ds in Datasheet.objects.order_by("?")[:content_count]:
contenus[ds.id] = {'id':ds.id, 'title':ds.title, 'description':ds.description, 'url':ds.url, 'tags':[]}
+
cont_count = Datasheet.objects.count()
qs = TaggedSheet.objects.select_related('tag','datasheet').filter(datasheet__id__in = contenus.keys(), order__lte = max_tag_order).order_by("order").only('order','tag__label','tag__id','datasheet__id')
for ts in qs:
contenus[ts.datasheet.id]['tags'].append({'id': ts.tag.id, 'label':ts.tag.label, 'order':ts.order})
contenus = contenus.values()
+
+ qs = Tag.objects.exclude(category__label = u"Localisation").exclude(category__label = u"Datation").exclude(category__label = u"Discipline artistice").annotate(nb=Count('datasheet')).order_by('-nb')[:tag_count]
+ tags = [{'id': tag.id, 'label': tag.label, 'score': tag.nb} for tag in qs]
+
+ qs = Country.objects.annotate(nb=Count('includes__tag__taggedsheet')).order_by('-nb')
+ countries = [{'id': country.id, 'dbpedia_uri': country.dbpedia_uri, 'score': country.nb} for country in qs]
- cursor = connection.cursor() #@UndefinedVariable
- try:
- cursor.execute("select t.id, t.label, sum(2*%s-ts.order) as score from hdabo_tag as t join hdabo_taggedsheet as ts on t.id = ts.tag_id where ts.order < %s group by t.id, t.label order by score limit %s",[max_tag_order, max_tag_order, tag_count])
- tags = [{'id': t[0], 'label':t[1], 'score':t[2]} for t in cursor.fetchall()]
- #tags = sorted([{'id':tag.id,'label':tag.label,'score':2*max_tag_order*tag.count_score - tag.sum_score} for tag in qs], key=lambda t:t['score'])[:tag_count]
- finally:
- cursor.close()
- cursor = connection.cursor() #@UndefinedVariable
- try:
- cursor.execute("select c.isocode as isocode, sum(2*%s-ts.order) as score from hdabo_tag as t join hdabo_taggedsheet as ts on t.id = ts.tag_id join hdalab_countrycode as c on t.label = c.label group by c.isocode", [max_tag_order])
- countries = [{'isocode': r[0], 'score':r[1]} for r in cursor.fetchall()]
-
- finally:
- cursor.close()
-
- qs = Tag.objects.annotate(count_score=Count('taggedsheet'),sum_score=Sum('taggedsheet__order')).filter(taggedsheet__order__lte = max_tag_order, category__label__iexact = u"Discipline")
- disciplines = sorted([{'label':tag.label,'score':2*max_tag_order*tag.count_score - tag.sum_score} for tag in qs], key=lambda t:t.score, reverse=True)[:10]
+ qs = Tag.objects.annotate(nb=Count('taggedsheet')).filter(category = 5).order_by('-nb')[:10]
+ disciplines = [{'label':tag.label,'score':tag.nb} for tag in qs]
years = {}
- qs = TagYears.objects.values('start_year', 'end_year').annotate(order_count=Count('tag__taggedsheet'), order_sum=Sum("tag__taggedsheet__order")).filter(tag__taggedsheet__order__lte=max_tag_order)
+ qs = TagYears.objects.annotate(nb=Count('tag__taggedsheet'))
for ty in qs:
- for year in range(ty['start_year'], ty['end_year']):
- years[year] = (2*max_tag_order*ty['order_count']-ty['order_sum'])/(ty['end_year']-ty['start_year']) + years[year] if year in years else 0
+ for year in range(ty.start_year, ty.end_year):
+ years[year] = ty.nb + years[year] if year in years else 0
yearchange = []
for year in sorted(years.keys()):
score = years[year]
if year < 2011:
- if year-1 not in years or years[year-1] != score:
+ if (year-1 not in years and score != 0) or (year-1 in years and years[year-1] != score):
yearchange.append({'year': year, 'score': score})
- if year+1 not in years and year != -1:
+ if year+1 not in years and year != -1 and score != 0:
yearchange.append({'year': year+1, 'score': 0})
output = {'count': cont_count, 'contents': contenus, 'tags':tags, 'sparkline':yearchange, 'countries':countries, 'disciplines':disciplines}