# HG changeset patch # User veltr # Date 1329933335 -3600 # Node ID fde8335a037cb42702ee4d76f9833c52acf1ce47 # Parent f316ba5856fe025795f7ff19913140f99ee8bbc3 Added Geographic Inclusion in the Django version (server only) diff -r f316ba5856fe -r fde8335a037c .hgignore --- a/.hgignore Fri Feb 17 13:07:49 2012 +0100 +++ b/.hgignore Wed Feb 22 18:55:35 2012 +0100 @@ -20,4 +20,8 @@ syntax: regexp \.sh$ syntax: regexp -^web/hdalab/config\.py$ \ No newline at end of file +^web/hdalab/config\.py$ +syntax: regexp +^web/hdalab/\.htaccess$ +syntax: regexp +^web/hdalab/\.htaccess$ \ No newline at end of file diff -r f316ba5856fe -r fde8335a037c .settings/org.eclipse.core.resources.prefs --- a/.settings/org.eclipse.core.resources.prefs Fri Feb 17 13:07:49 2012 +0100 +++ b/.settings/org.eclipse.core.resources.prefs Wed Feb 22 18:55:35 2012 +0100 @@ -1,40 +1,43 @@ -#Wed Feb 15 16:29:39 CET 2012 -eclipse.preferences.version=1 -encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/haystack/backends/__init__.py=utf-8 -encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/fields.py=utf-8 -encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/forms.py=utf-8 -encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/tests.py=utf-8 -encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/whoosh/analysis.py=utf8 -encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/api.py=utf-8 -encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/wiki.py=utf-8 -encoding//web/hdabo/forms.py=utf-8 -encoding//web/hdabo/management/commands/import_csv.py=utf-8 -encoding//web/hdabo/management/commands/import_tag_popularity.py=utf-8 -encoding//web/hdabo/management/commands/query_wikipedia.py=utf-8 -encoding//web/hdabo/migrations/0001_initial.py=utf-8 -encoding//web/hdabo/migrations/0002_backport_hdabo_sf.py=utf-8 -encoding//web/hdabo/migrations/0003_update_redirection.py=utf-8 -encoding//web/hdabo/models.py=utf-8 -encoding//web/hdabo/search/french_whoosh_backend.py=utf-8 -encoding//web/hdabo/tests/models.py=utf-8 -encoding//web/hdabo/utils.py=utf-8 -encoding//web/hdabo/views.py=utf-8 -encoding//web/hdabo/wp_utils.py=utf-8 -encoding//web/hdalab/__init__.py=utf-8 -encoding//web/hdalab/config.py=utf-8 -encoding//web/hdalab/manage.py=utf-8 -encoding//web/hdalab/management/commands/export_tags_csv.py=utf-8 -encoding//web/hdalab/management/commands/export_wpcategory_csv.py=utf-8 -encoding//web/hdalab/management/commands/fill_country_codes.py=utf-8 -encoding//web/hdalab/management/commands/fill_tag_years.py=utf-8 -encoding//web/hdalab/management/commands/query_dbpedia.py=utf-8 -encoding//web/hdalab/management/commands/query_wikipedia_category.py=utf-8 -encoding//web/hdalab/management/utils.py=utf-8 -encoding//web/hdalab/migrations/0001_initial.py=utf-8 -encoding//web/hdalab/migrations/0002_dataviz.py=utf-8 -encoding//web/hdalab/migrations/0003_hdasession.py=utf-8 -encoding//web/hdalab/migrations/0005_rename_end_year.py=utf-8 -encoding//web/hdalab/models/dataviz.py=utf-8 -encoding//web/hdalab/settings.py=utf-8 -encoding//web/hdalab/urls.py=utf-8 -encoding//web/hdalab/views/ajax.py=utf-8 +#Wed Feb 22 17:20:19 CET 2012 +eclipse.preferences.version=1 +encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/haystack/backends/__init__.py=utf-8 +encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/fields.py=utf-8 +encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/forms.py=utf-8 +encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/tests.py=utf-8 +encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/whoosh/analysis.py=utf8 +encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/api.py=utf-8 +encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/wiki.py=utf-8 +encoding//web/hdabo/forms.py=utf-8 +encoding//web/hdabo/management/commands/import_csv.py=utf-8 +encoding//web/hdabo/management/commands/import_tag_popularity.py=utf-8 +encoding//web/hdabo/management/commands/query_wikipedia.py=utf-8 +encoding//web/hdabo/migrations/0001_initial.py=utf-8 +encoding//web/hdabo/migrations/0002_backport_hdabo_sf.py=utf-8 +encoding//web/hdabo/migrations/0003_update_redirection.py=utf-8 +encoding//web/hdabo/models.py=utf-8 +encoding//web/hdabo/search/french_whoosh_backend.py=utf-8 +encoding//web/hdabo/tests/models.py=utf-8 +encoding//web/hdabo/utils.py=utf-8 +encoding//web/hdabo/views.py=utf-8 +encoding//web/hdabo/wp_utils.py=utf-8 +encoding//web/hdalab/__init__.py=utf-8 +encoding//web/hdalab/config.py=utf-8 +encoding//web/hdalab/manage.py=utf-8 +encoding//web/hdalab/management/commands/export_tags_csv.py=utf-8 +encoding//web/hdalab/management/commands/export_wpcategory_csv.py=utf-8 +encoding//web/hdalab/management/commands/fill_country_codes.py=utf-8 +encoding//web/hdalab/management/commands/fill_tag_years.py=utf-8 +encoding//web/hdalab/management/commands/query_dbpedia.py=utf-8 +encoding//web/hdalab/management/commands/query_geo_inclusion.py=utf-8 +encoding//web/hdalab/management/commands/query_wikipedia_category.py=utf-8 +encoding//web/hdalab/management/utils.py=utf-8 +encoding//web/hdalab/migrations/0001_initial.py=utf-8 +encoding//web/hdalab/migrations/0002_dataviz.py=utf-8 +encoding//web/hdalab/migrations/0003_hdasession.py=utf-8 +encoding//web/hdalab/migrations/0005_rename_end_year.py=utf-8 +encoding//web/hdalab/migrations/0006_dbpedia_fields_one_to_one.py=utf-8 +encoding//web/hdalab/migrations/0007_geographic_inclusion.py=utf-8 +encoding//web/hdalab/models/dataviz.py=utf-8 +encoding//web/hdalab/settings.py=utf-8 +encoding//web/hdalab/urls.py=utf-8 +encoding//web/hdalab/views/ajax.py=utf-8 diff -r f316ba5856fe -r fde8335a037c virtualenv/res/lib/lib_create_env.py --- a/virtualenv/res/lib/lib_create_env.py Fri Feb 17 13:07:49 2012 +0100 +++ b/virtualenv/res/lib/lib_create_env.py Wed Feb 22 18:55:35 2012 +0100 @@ -23,6 +23,7 @@ 'WHOOSH' : { 'setup': 'Whoosh', 'url': 'https://bitbucket.org/mchaput/whoosh/get/tip.tar.bz2', 'local': 'whoosh-1.8.3.tar.bz2'}, 'WIKITOOLS' : { 'setup': 'wikitools', 'url': 'wikitools.tar.bz2', 'local': 'wikitools.tar.bz2'}, 'RDFLIB' : { 'setup': 'rdflib', 'url': 'http://rdflib.googlecode.com/files/rdflib-3.2.0.tar.gz', 'local': 'rdflib-3.2.0.tar.gz'}, + 'SPARQLWRAPPER' : { 'setup': 'SPARQLWrapper', 'url': 'http://downloads.sourceforge.net/project/sparql-wrapper/sparql-wrapper-python/1.5.0/SPARQLWrapper-1.5.0.tar.gz', 'local' : 'SPARQLWrapper-1.5.0.tar.gz'}, } if system_str == 'Windows': diff -r f316ba5856fe -r fde8335a037c virtualenv/res/src/SPARQLWrapper-1.5.0.tar.gz Binary file virtualenv/res/src/SPARQLWrapper-1.5.0.tar.gz has changed diff -r f316ba5856fe -r fde8335a037c virtualenv/web/res/res_create_env.py --- a/virtualenv/web/res/res_create_env.py Fri Feb 17 13:07:49 2012 +0100 +++ b/virtualenv/web/res/res_create_env.py Wed Feb 22 18:55:35 2012 +0100 @@ -35,6 +35,7 @@ ('WHOOSH', 'pip', None, None), ('HAYSTACK', 'pip', None, None), ('WIKITOOLS', 'pip', None, None), + ('SPARQLWRAPPER', 'pip', None, None), ]) if system_str == "Darwin": diff -r f316ba5856fe -r fde8335a037c web/hdalab/.htaccess --- a/web/hdalab/.htaccess Fri Feb 17 13:07:49 2012 +0100 +++ b/web/hdalab/.htaccess Wed Feb 22 18:55:35 2012 +0100 @@ -1,7 +1,7 @@ SetEnv DJANGO_SETTINGS_MODULE hdalab.settings -SetEnv PROJECT_PATH /Users/ymh/dev/workspace/hdabo/web -SetEnv PYTHON_PATH /Users/ymh/dev/workspace/hdabo/virtualenv/web/env/hdabo/lib/python2.6/site-packages:/Applications/Dev/eclipse/plugins/org.python.pydev.debug_2.2.0.2011062419/pysrc/ +SetEnv PROJECT_PATH /home/raph/shared/hdabo/web +SetEnv PYTHON_PATH /home/raph/venv_hdabo/env/lib/python2.7/site-packages SetEnv PYDEV_DEBUG False Options ExecCGI FollowSymLinks @@ -13,7 +13,7 @@ RewriteEngine On RewriteCond %{REQUEST_FILENAME} !-f -RewriteRule ^(.*)$ /~ymh/hdabo/hdalab/modwsgi.wsgi/$1 [QSA,PT,L] +RewriteRule ^(.*)$ /hda/hdalab/modwsgi.wsgi/$1 [QSA,PT,L] Header set Pragma "no-cache" Header set Cache-Control "no-cache" diff -r f316ba5856fe -r fde8335a037c web/hdalab/management/commands/fill_tag_years.py --- a/web/hdalab/management/commands/fill_tag_years.py Fri Feb 17 13:07:49 2012 +0100 +++ b/web/hdalab/management/commands/fill_tag_years.py Wed Feb 22 18:55:35 2012 +0100 @@ -31,7 +31,7 @@ start = int(range[0][1]) end = start if range[0][0]: - end = start + 9 + end = start + 10 if range[0][2]: end = int(range[0][2]) else: @@ -46,7 +46,7 @@ start = -100*val else: start = 1 + 100*(val-1) - end = start + 99 + end = start + 100 if start != None and end != None: ty = TagYears.objects.create(tag=tag, start_year=start, end_year=end) diff -r f316ba5856fe -r fde8335a037c web/hdalab/management/commands/query_geo_inclusion.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/hdalab/management/commands/query_geo_inclusion.py Wed Feb 22 18:55:35 2012 +0100 @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +''' +Created on Feb 22, 2012 + +@author: raphv +''' + +from django.core.management.base import NoArgsCommand +from django.core.management.color import no_style +from hdabo.utils import show_progress +from hdabo.models import Tag +from hdalab.models import Country, GeoInclusion +from SPARQLWrapper import SPARQLWrapper, JSON +import re + +class Command(NoArgsCommand): + def handle_noargs(self, **options): + self.style = no_style() + + qs = Tag.objects.filter(category__label="Localisation").exclude(dbpedia_uri = None) + total = qs.count() + + endpoint = SPARQLWrapper("http://dbpedia.org/sparql") + endpoint.setReturnFormat(JSON) + sparqltext = """ + SELECT ?resource WHERE { + { <%s> ?resource . } + UNION + { <%s> ?resource . } + } + """ + resourceprefix = "http://dbpedia.org/resource/" + identityuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" + + writer = None + + for i,tag in enumerate(qs): + endpoint.setQuery(sparqltext % (tag.dbpedia_uri, tag.dbpedia_uri)) + + results = endpoint.query().convert()['results']['bindings'] + + if len(results) == 1: # We don't want places located in multiple countries + + resourceuri = results[0]['resource']['value'] + + if re.match(resourceprefix, resourceuri): + countrytxt = re.findall('([^/]+$)', resourceuri)[0] + + country, created = Country.objects.get_or_create(dbpedia_uri=resourceuri) + GeoInclusion.objects.get_or_create(tag=tag, country=country) + + if resourceuri == identityuri: + countrytxt = '' + + country, created = Country.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri) + GeoInclusion.objects.get_or_create(tag=tag, country=country) + + else: + countrytxt = '' + + writer = show_progress(i+1, total, '%s => %s'%(tag.label, countrytxt), 50, writer) \ No newline at end of file diff -r f316ba5856fe -r fde8335a037c web/hdalab/migrations/0007_geographic_inclusion.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/hdalab/migrations/0007_geographic_inclusion.py Wed Feb 22 18:55:35 2012 +0100 @@ -0,0 +1,139 @@ +# encoding: utf-8 +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + +class Migration(SchemaMigration): + + def forwards(self, orm): + + # Deleting model 'CountryCode' + db.delete_table('hdalab_countrycode') + + # Adding model 'GeoInclusion' + db.create_table('hdalab_geoinclusion', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('tag', self.gf('django.db.models.fields.related.OneToOneField')(related_name='locatedin', unique=True, to=orm['hdabo.Tag'])), + ('country', self.gf('django.db.models.fields.related.ForeignKey')(related_name='includes', to=orm['hdalab.Country'])), + )) + db.send_create_signal('hdalab', ['GeoInclusion']) + + # Adding model 'Country' + db.create_table('hdalab_country', ( + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('dbpedia_uri', self.gf('django.db.models.fields.URLField')(unique=True, max_length=255, db_index=True)), + )) + db.send_create_signal('hdalab', ['Country']) + + + def backwards(self, orm): + + # Adding model 'CountryCode' + db.create_table('hdalab_countrycode', ( + ('isocode', self.gf('django.db.models.fields.CharField')(max_length=3, unique=True, db_index=True)), + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('label', self.gf('django.db.models.fields.CharField')(max_length=1024, unique=True, db_index=True)), + )) + db.send_create_signal('hdalab', ['CountryCode']) + + # Deleting model 'GeoInclusion' + db.delete_table('hdalab_geoinclusion') + + # Deleting model 'Country' + db.delete_table('hdalab_country') + + + models = { + 'hdabo.tag': { + 'Meta': {'unique_together': "(('label', 'original_label', 'url_status'),)", 'object_name': 'Tag'}, + 'alias': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), + 'alternative_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), + 'alternative_wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}), + 'alternative_wikipedia_url': ('django.db.models.fields.URLField', [], {'db_index': 'True', 'max_length': '2048', 'null': 'True', 'blank': 'True'}), + 'category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagCategory']", 'null': 'True', 'blank': 'True'}), + 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'dbpedia_uri': ('django.db.models.fields.URLField', [], {'db_index': 'True', 'max_length': '2048', 'null': 'True', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}), + 'normalized_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}), + 'original_label': ('django.db.models.fields.CharField', [], {'max_length': '1024'}), + 'popularity': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}), + 'url_status': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'db_index': 'True', 'blank': 'True'}), + 'wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'db_index': 'True', 'null': 'True', 'blank': 'True'}), + 'wikipedia_url': ('django.db.models.fields.URLField', [], {'db_index': 'True', 'max_length': '2048', 'null': 'True', 'blank': 'True'}) + }, + 'hdabo.tagcategory': { + 'Meta': {'object_name': 'TagCategory'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}) + }, + 'hdalab.country': { + 'Meta': {'object_name': 'Country'}, + 'dbpedia_uri': ('django.db.models.fields.URLField', [], {'unique': 'True', 'max_length': '255', 'db_index': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}) + }, + 'hdalab.dbpediafields': { + 'Meta': {'object_name': 'DbpediaFields'}, + 'abstract': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'dbpedia_uri': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'db_index': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'label': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), + 'tag': ('django.db.models.fields.related.OneToOneField', [], {'blank': 'True', 'related_name': "'dbpedia_fields'", 'unique': 'True', 'null': 'True', 'to': "orm['hdabo.Tag']"}), + 'thumbnail': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}) + }, + 'hdalab.geoinclusion': { + 'Meta': {'object_name': 'GeoInclusion'}, + 'country': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'includes'", 'to': "orm['hdalab.Country']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'tag': ('django.db.models.fields.related.OneToOneField', [], {'related_name': "'locatedin'", 'unique': 'True', 'to': "orm['hdabo.Tag']"}) + }, + 'hdalab.hdasession': { + 'Meta': {'object_name': 'HdaSession'}, + 'data': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'sessionid': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '36', 'db_index': 'True'}) + }, + 'hdalab.infoboxparameter': { + 'Meta': {'unique_together': "(('tag_infobox', 'param_name'),)", 'object_name': 'InfoboxParameter'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'param_name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}), + 'param_value': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'tag_infobox': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdalab.TagInfobox']"}) + }, + 'hdalab.taginfobox': { + 'Meta': {'unique_together': "(('tag', 'name', 'revision_id'),)", 'object_name': 'TagInfobox'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}), + 'revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}), + 'source': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'infoboxes'", 'to': "orm['hdabo.Tag']"}) + }, + 'hdalab.taglinks': { + 'Meta': {'object_name': 'TagLinks'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'object': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'taglinks_objects'", 'to': "orm['hdabo.Tag']"}), + 'subject': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'taglinks_subjects'", 'to': "orm['hdabo.Tag']"}) + }, + 'hdalab.tagwpcategory': { + 'Meta': {'unique_together': "(('tag', 'wp_category', 'hidden'),)", 'object_name': 'TagWpCategory'}, + 'hidden': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'wp_categories'", 'to': "orm['hdabo.Tag']"}), + 'wp_category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdalab.WpCategory']"}) + }, + 'hdalab.tagyears': { + 'Meta': {'object_name': 'TagYears'}, + 'end_year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'start_year': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), + 'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'years'", 'to': "orm['hdabo.Tag']"}) + }, + 'hdalab.wpcategory': { + 'Meta': {'object_name': 'WpCategory'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '2048'}) + } + } + + complete_apps = ['hdalab'] diff -r f316ba5856fe -r fde8335a037c web/hdalab/models/__init__.py --- a/web/hdalab/models/__init__.py Fri Feb 17 13:07:49 2012 +0100 +++ b/web/hdalab/models/__init__.py Wed Feb 22 18:55:35 2012 +0100 @@ -1,5 +1,5 @@ from hdalab.models.categories import (WpCategory, InfoboxParameter, TagInfobox, TagWpCategory) -from hdalab.models.dataviz import TagYears, CountryCode, TagLinks, DbpediaFields, HdaSession +from hdalab.models.dataviz import TagYears, Country, GeoInclusion, TagLinks, DbpediaFields, HdaSession -__all__ = ['WpCategory', 'InfoboxParameter', 'TagInfobox', 'TagWpCategory', 'TagYears', 'CountryCode', 'TagLinks', 'DbpediaFields', 'HdaSession'] \ No newline at end of file +__all__ = ['WpCategory', 'InfoboxParameter', 'TagInfobox', 'TagWpCategory', 'TagYears', 'Country', 'GeoInclusion', 'TagLinks', 'DbpediaFields', 'HdaSession'] \ No newline at end of file diff -r f316ba5856fe -r fde8335a037c web/hdalab/models/dataviz.py --- a/web/hdalab/models/dataviz.py Fri Feb 17 13:07:49 2012 +0100 +++ b/web/hdalab/models/dataviz.py Wed Feb 22 18:55:35 2012 +0100 @@ -26,10 +26,25 @@ class Meta: app_label = 'hdalab' -class CountryCode(models.Model): +#class CountryCode(models.Model): +# +# label = models.CharField(max_length=1024, unique=True, blank=False, null=False, db_index=True) +# isocode = models.CharField(max_length=3, unique=True, blank=False, null=False, db_index=True) +# +# class Meta: +# app_label = 'hdalab' - label = models.CharField(max_length=1024, unique=True, blank=False, null=False, db_index=True) - isocode = models.CharField(max_length=3, unique=True, blank=False, null=False, db_index=True) +class Country(models.Model): + + dbpedia_uri = models.URLField(verify_exists=False, max_length=255, blank=False, null=False, db_index=True, unique=True) + + class Meta: + app_label = 'hdalab' + +class GeoInclusion(models.Model): + + tag = models.OneToOneField(Tag, related_name="locatedin", db_index=True) + country = models.ForeignKey(Country, blank=False, null=False, related_name="includes", db_index=True) class Meta: app_label = 'hdalab' diff -r f316ba5856fe -r fde8335a037c web/hdalab/views/ajax.py --- a/web/hdalab/views/ajax.py Fri Feb 17 13:07:49 2012 +0100 +++ b/web/hdalab/views/ajax.py Wed Feb 22 18:55:35 2012 +0100 @@ -9,7 +9,7 @@ from django.db.models import Q, Count, Sum from django.http import HttpResponse from hdabo.models import Tag, Datasheet, TaggedSheet -from hdalab.models import TagLinks, HdaSession, CountryCode, TagYears +from hdalab.models import TagLinks, HdaSession, Country, GeoInclusion, TagYears import django.utils.simplejson as json import hmac import uuid @@ -101,13 +101,10 @@ if label or periode or contentlist : - + qs = Datasheet.objects.filter(validated=True) - - #$globalsql = "SELECT id, title, description, url FROM hdabo_datasheet E"; globalids = [] - #$globalfilters = array(); if periode: years = periode.split(",") @@ -124,7 +121,7 @@ globalids += [t.id for t in tagqs] - qs = qs.filter(taggedsheet__tag__in = tagqs) + qs = qs.filter(taggedsheet__tag__in = tagqs) if label: for txtlbl in label.split(","): @@ -138,95 +135,105 @@ if contentlist: qs = qs.filter(id__in = contentlist.split(",")) - if contentlist is None: - qs = qs.order_by('?') +# if contentlist is None: +# qs = qs.order_by('?') + qs = qs.distinct() - for content in qs: - cont_count += 1 - contenus[content.id] = {'score' : 0, 'tags' : [], 'id':content.id, 'title': content.title, 'description': content.description, 'url': content.url} + contenus = dict([(content.id, {'score' : 0, 'tags' : [], 'id':content.id, 'title': content.title, 'description': content.description, 'url': content.url}) for content in qs]) - - countries = dict([ (cc.label, {'isocode':cc.isocode, 'score':0}) for cc in CountryCode.objects.all() ]) - disciplines = dict([ (d.label, {'label':d.label, 'score':0}) for d in Tag.objects.filter(category__label = "Discipline artistique")]) + #countries = dict([ (cc.label, {'isocode':cc.isocode, 'score':0}) for cc in CountryCode.objects.all() ]) + #disciplines = dict([ (d.label, {'label':d.label, 'score':0}) for d in Tag.objects.filter(category__label = "Discipline artistique")]) + + countries = {} + disciplines = {} tags = {} contentids = contenus.keys() - qs = TaggedSheet.objects.select_related('tag').filter(datasheet__in = contentids, order__lte = max_tag_order).order_by('order') + cont_count = len(contentids) + + qs = TaggedSheet.objects.select_related('tag','tag__category','tag__locatedin__country').filter(datasheet__in = contentids, order__lte = max_tag_order).order_by('order') for ts in qs: match_tag = ts.tag.id in globalids - contenus[ts.datasheet.id]['tags'].append({'id': ts.tag.id, 'label':ts.tag.label, 'order':ts.order, 'match': match_tag}) + contenutags = {'id': ts.tag.id, 'label':ts.tag.label, 'order':ts.order, 'match': match_tag } + +# if ts.tag.category is not None: +# contenutags['category'] = ts.tag.category.label + + contenus[ts.datasheet.id]['tags'].append(contenutags) + tagscore = 2*max_tag_order - ts.order - if ts.tag.id not in tags: - tags[ts.tag.id] = {'id':ts.tag.id,'label':ts.tag.label, 'score':0} - tags[ts.tag.id]['score'] += tagscore + if ts.tag.category is not None and ts.tag.category.label not in [u'Discipline artistique', u'Datation', u'Localisation']: + if ts.tag.id not in tags: + tags[ts.tag.id] = {'id':ts.tag.id,'label':ts.tag.label, 'score':0} + tags[ts.tag.id]['score'] += 1 + if match_tag: + tags[ts.tag.id]['match'] = True + if match_tag: contenus[ts.datasheet.id]['score'] += tagscore - tags[ts.tag.id]['match'] = True - if ts.tag.label in countries: - countries[ts.tag.label]['score'] += tagscore - if ts.tag.label in disciplines: - disciplines[ts.tag.label]['score'] += tagscore - - content_count = content_count if not contentlist else len(contenus) - contenus = sorted(contenus.values(),key=lambda e: e.get('score', 0))[0:content_count] + + if ts.tag.category is not None and ts.tag.category.label == u'Discipline artistique': + if ts.tag.label not in disciplines: + disciplines[ts.tag.label] = {'label':ts.tag.label, 'score':0} + disciplines[ts.tag.label]['score'] += 1 + + if ts.tag.locatedin is not None: + country_id = ts.tag.locatedin.country.id + if country_id not in countries: + countries[country_id] = {'id':country_id, 'dbpedia_uri':ts.tag.locatedin.country.dbpedia_uri, 'score': 0} + countries[country_id]['score'] += 1 + + if contentlist is None: + contenus = sorted(contenus.values(),key=lambda e: -e['score'])[0:content_count] + contenus = [contenu for contenu in contenus if contenu['score']] + countries = countries.values() - tags = sorted(tags.values(), key=lambda e: e.get('score', 0))[0:tag_count] - disciplines = sorted(disciplines.values(), key=lambda e: e.get('score', 0))[0:10] + tags = sorted(tags.values(), key=lambda e: -e['score'])[0:tag_count] + disciplines = sorted(disciplines.values(), key=lambda e: -e['score'])[0:10] years = {} + if contentids: - qs = TagYears.objects.values('start_year', 'end_year').annotate(order_count=Count('tag__taggedsheet'), order_sum=Sum("tag__taggedsheet__order")).filter(tag__taggedsheet__order__lte=max_tag_order, tag__taggedsheet__datasheet__in = contentids) + qs = TagYears.objects.filter(tag__taggedsheet__datasheet__in = contentids).annotate(nb=Count('tag__taggedsheet')) for ty in qs: - for year in range(ty['start_year'], ty['end_year']): - years[year] = (2*max_tag_order*ty['order_count']-ty['order_sum'])/(ty['end_year']-ty['start_year']) + years[year] if year in years else 0 -# $rq = pg_query("SELECT U.start_year, U.end_year, SUM(".(2*$max_tag_order)." - V.order)/(U.end_year + 1 - U.start_year) score FROM hdaviz_years U, hdabo_taggedsheet V WHERE U.tag_id = V.tag_id AND V.order <= $max_tag_order AND V.datasheet_id IN ($contentids) GROUP BY U.start_year, U.end_year"); -# while($ligne = pg_fetch_row($rq)) { -# foreach(range($ligne[0], $ligne[1]) as $year) { -# $years[$year] = $ligne[2] + ( isset($years[$year]) ? $years[$year] : 0 ); + for year in range(ty.start_year, ty.end_year): + years[year] = ty.nb + years[year] if year in years else 0 else: for ds in Datasheet.objects.order_by("?")[:content_count]: contenus[ds.id] = {'id':ds.id, 'title':ds.title, 'description':ds.description, 'url':ds.url, 'tags':[]} + cont_count = Datasheet.objects.count() qs = TaggedSheet.objects.select_related('tag','datasheet').filter(datasheet__id__in = contenus.keys(), order__lte = max_tag_order).order_by("order").only('order','tag__label','tag__id','datasheet__id') for ts in qs: contenus[ts.datasheet.id]['tags'].append({'id': ts.tag.id, 'label':ts.tag.label, 'order':ts.order}) contenus = contenus.values() + + qs = Tag.objects.exclude(category__label = u"Localisation").exclude(category__label = u"Datation").exclude(category__label = u"Discipline artistice").annotate(nb=Count('datasheet')).order_by('-nb')[:tag_count] + tags = [{'id': tag.id, 'label': tag.label, 'score': tag.nb} for tag in qs] + + qs = Country.objects.annotate(nb=Count('includes__tag__taggedsheet')).order_by('-nb') + countries = [{'id': country.id, 'dbpedia_uri': country.dbpedia_uri, 'score': country.nb} for country in qs] - cursor = connection.cursor() #@UndefinedVariable - try: - cursor.execute("select t.id, t.label, sum(2*%s-ts.order) as score from hdabo_tag as t join hdabo_taggedsheet as ts on t.id = ts.tag_id where ts.order < %s group by t.id, t.label order by score limit %s",[max_tag_order, max_tag_order, tag_count]) - tags = [{'id': t[0], 'label':t[1], 'score':t[2]} for t in cursor.fetchall()] - #tags = sorted([{'id':tag.id,'label':tag.label,'score':2*max_tag_order*tag.count_score - tag.sum_score} for tag in qs], key=lambda t:t['score'])[:tag_count] - finally: - cursor.close() - cursor = connection.cursor() #@UndefinedVariable - try: - cursor.execute("select c.isocode as isocode, sum(2*%s-ts.order) as score from hdabo_tag as t join hdabo_taggedsheet as ts on t.id = ts.tag_id join hdalab_countrycode as c on t.label = c.label group by c.isocode", [max_tag_order]) - countries = [{'isocode': r[0], 'score':r[1]} for r in cursor.fetchall()] - - finally: - cursor.close() - - qs = Tag.objects.annotate(count_score=Count('taggedsheet'),sum_score=Sum('taggedsheet__order')).filter(taggedsheet__order__lte = max_tag_order, category__label__iexact = u"Discipline") - disciplines = sorted([{'label':tag.label,'score':2*max_tag_order*tag.count_score - tag.sum_score} for tag in qs], key=lambda t:t.score, reverse=True)[:10] + qs = Tag.objects.annotate(nb=Count('taggedsheet')).filter(category = 5).order_by('-nb')[:10] + disciplines = [{'label':tag.label,'score':tag.nb} for tag in qs] years = {} - qs = TagYears.objects.values('start_year', 'end_year').annotate(order_count=Count('tag__taggedsheet'), order_sum=Sum("tag__taggedsheet__order")).filter(tag__taggedsheet__order__lte=max_tag_order) + qs = TagYears.objects.annotate(nb=Count('tag__taggedsheet')) for ty in qs: - for year in range(ty['start_year'], ty['end_year']): - years[year] = (2*max_tag_order*ty['order_count']-ty['order_sum'])/(ty['end_year']-ty['start_year']) + years[year] if year in years else 0 + for year in range(ty.start_year, ty.end_year): + years[year] = ty.nb + years[year] if year in years else 0 yearchange = [] for year in sorted(years.keys()): score = years[year] if year < 2011: - if year-1 not in years or years[year-1] != score: + if (year-1 not in years and score != 0) or (year-1 in years and years[year-1] != score): yearchange.append({'year': year, 'score': score}) - if year+1 not in years and year != -1: + if year+1 not in years and year != -1 and score != 0: yearchange.append({'year': year+1, 'score': 0}) output = {'count': cont_count, 'contents': contenus, 'tags':tags, 'sparkline':yearchange, 'countries':countries, 'disciplines':disciplines}