# HG changeset patch # User veltr # Date 1327944059 -3600 # Node ID 0f9cc90c49fa787f8d847737ef798587b2df9601 # Parent 46c0f7a935d1f75ab27f94fc9df3d203ff57589f# Parent c59383cc9940dc2384d58279273b4f4f13452f8d Merge with c59383cc9940dc2384d58279273b4f4f13452f8d diff -r 46c0f7a935d1 -r 0f9cc90c49fa .hgignore --- a/.hgignore Mon Jan 30 18:20:21 2012 +0100 +++ b/.hgignore Mon Jan 30 18:20:59 2012 +0100 @@ -18,4 +18,6 @@ syntax: regexp ^virtualenv/res/src/south$ syntax: regexp -\.sh$ \ No newline at end of file +\.sh$ +syntax: regexp +^web/hdalab/config\.py$ \ No newline at end of file diff -r 46c0f7a935d1 -r 0f9cc90c49fa .settings/org.eclipse.core.resources.prefs --- a/.settings/org.eclipse.core.resources.prefs Mon Jan 30 18:20:21 2012 +0100 +++ b/.settings/org.eclipse.core.resources.prefs Mon Jan 30 18:20:59 2012 +0100 @@ -1,23 +1,30 @@ -#Mon Jan 16 02:39:01 CET 2012 +#Fri Jan 27 15:11:21 CET 2012 eclipse.preferences.version=1 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/haystack/backends/__init__.py=utf-8 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/fields.py=utf-8 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/forms.py=utf-8 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/sortedm2m/tests.py=utf-8 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/whoosh/analysis.py=utf8 +encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/api.py=utf-8 encoding//virtualenv/web/env/hdabo/lib/python2.6/site-packages/wikitools/wiki.py=utf-8 encoding//web/hdabo/forms.py=utf-8 encoding//web/hdabo/management/commands/import_csv.py=utf-8 encoding//web/hdabo/management/commands/import_tag_popularity.py=utf-8 encoding//web/hdabo/management/commands/query_wikipedia.py=utf-8 -encoding//web/hdabo/management/commands/query_wikipedia_category.py=utf-8 encoding//web/hdabo/migrations/0001_initial.py=utf-8 encoding//web/hdabo/migrations/0002_backport_hdabo_sf.py=utf-8 encoding//web/hdabo/migrations/0003_update_redirection.py=utf-8 -encoding//web/hdabo/migrations/0004_wp_category.py=utf-8 encoding//web/hdabo/models.py=utf-8 encoding//web/hdabo/search/french_whoosh_backend.py=utf-8 encoding//web/hdabo/tests/models.py=utf-8 encoding//web/hdabo/utils.py=utf-8 encoding//web/hdabo/views.py=utf-8 encoding//web/hdabo/wp_utils.py=utf-8 +encoding//web/hdalab/__init__.py=utf-8 +encoding//web/hdalab/config.py=utf-8 +encoding//web/hdalab/manage.py=utf-8 +encoding//web/hdalab/management/commands/export_tags_csv.py=utf-8 +encoding//web/hdalab/management/commands/query_wikipedia_category.py=utf-8 +encoding//web/hdalab/migrations/0001_initial.py=utf-8 +encoding//web/hdalab/settings.py=utf-8 +encoding//web/hdalab/urls.py=utf-8 diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdabo/management/commands/query_wikipedia_category.py --- a/web/hdabo/management/commands/query_wikipedia_category.py Mon Jan 30 18:20:21 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,362 +0,0 @@ -# -*- coding: utf-8 -*- -''' -Created on Jun 7, 2011 - -@author: ymh -''' - -from django.conf import settings -from django.core.management.base import NoArgsCommand -from django.core.management.color import no_style -from hdabo.models import Tag, WpCategory, TagWpCategory, TagInfobox, InfoboxParameter -from optparse import make_option -from wikitools import api,wiki -import sys -import re -import itertools -from hdabo import utils -from django.db.models import Count -from django.db import transaction - - -TYPES_MASK_DICT = { - u'visible': 0b001, - u'hidden': 0b010, - u'infobox': 0b100, - u'all': 0b111, - } - -START_PATTERN = re.compile(u"\{\{\s?Infobox\s+([^|]+)", re.M|re.U|re.I) -END_PATTERN = re.compile(u"\{\{|\}\}", re.M|re.U) -SPLIT_PATTERN = re.compile("\s*?\|\s*([\w]+[\w \t-]*)\s*=", re.U|re.M) -DELIMITER_PATTERN = re.compile("\{{2,3}|\}{2,3}|\[\[|\]\]|\[|\]") - - - -class Command(NoArgsCommand): - ''' - query and update wikipedia for tag title. - ''' - options = '' - help = """query and update wikipedia for tag title.""" - - option_list = NoArgsCommand.option_list + ( - make_option('--all', - action='store_true', - dest='all', - default=False, - help='force all tags to be updated, not only those not yet processed'), - make_option('--force', - action='store_true', - dest='force', - default=False, - help='ask no questions'), - make_option('--random', - action='store_true', - dest='random', - default=False, - help='randomize query on tags'), - make_option('--site', - action='store', - type='string', - dest='site_url', - default="http://fr.wikipedia.org/w/api.php", - help='the url for the wikipedia site'), - make_option('--limit', - action='store', - type='int', - dest='limit', - default= -1, - help='number of tag to process'), - make_option('--start', - action='store', - type='int', - dest='start', - default=0, - help='number of tag to ignore'), - make_option('--type', - action='append', - dest='types', - type='choice', - choices=['visible','hidden', 'infobox', 'all'], - default=[], - help='what type of query to oerform : visible : visible categories, hidden : hidden categories, infobox: infoboxes, all: all of them. This option can be assed multiple times' - ), - ) - - -# def process_wp_response(self, label, response): -# -# query_dict = response['query'] -# # get page if multiple pages or none -> return Tag.null_result -# pages = query_dict.get("pages", {}) -# if len(pages) > 1 or len(pages) == 0: -# return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None -# -# page = pages.values()[0] -# -# if u"invalid" in page or u"missing" in page: -# return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None -# -# url = page.get(u'fullurl', None) -# pageid = page.get(u'pageid', None) -# new_label = page[u'title'] -# -# if self.__is_homonymie(page): -# status = Tag.TAG_URL_STATUS_DICT["homonyme"] -# elif u"redirect" in page: -# status = Tag.TAG_URL_STATUS_DICT["redirection"] -# else: -# status = Tag.TAG_URL_STATUS_DICT["match"] -# -# return new_label, status, url, pageid - - def query_all_categories(self, hidden, site, pageid): - - clshow = 'hidden' if hidden else '!hidden' - params = {'action':'query', 'pageids': pageid, 'prop':'categories', 'clshow': clshow} - - clcontinue = "" - res = [] - - while clcontinue is not None: - if clcontinue: - params['clcontinue'] = clcontinue - - wpquery = api.APIRequest(site, params) #@UndefinedVariable - response = wpquery.query() - - query_dict = response.get('query', None) - - if query_dict is None: - return res - - pages = query_dict.get("pages", {}) - if len(pages) > 1 or len(pages) == 0: - return res - - page = pages.values()[0] - - for cat in page.get('categories',[]): - title = cat.get('title',"") - title = title[title.find(":")+1:] - if title and clcontinue != ("%s|%s" % (pageid,title)): - res.append(title) - - clcontinue = response.get('query-continue', {}).get('categories',{}).get('clcontinue', None) - - return res - - def process_categories(self, cat_list, hidden, tag): - - for cat in cat_list: - wp_cat,created = WpCategory.objects.get_or_create(label=cat) #@UnusedVariable - TagWpCategory.objects.get_or_create(tag=tag, wp_category=wp_cat, hidden=hidden) - - - def query_infoboxes(self, site, pageid): - - res = [] - params = {'action':'query', 'pageids': pageid, 'prop':'revisions', 'rvprop': 'ids|content'} - wpquery = api.APIRequest(site, params) #@UndefinedVariable - response = wpquery.query() - - query_dict = response.get('query', None) - - if query_dict is None: - return res - - pages = query_dict.get("pages", {}) - if len(pages) > 1 or len(pages) == 0: - return res - - page = pages.values()[0] - - if 'revisions' not in page or not page['revisions']: - return res - - rev = page['revisions'][0] - - content = rev['*'] - - start = 0 - depth = 0 - current_infobox_name = None - current_start = 0 - - while start <= len(content): - if depth==0: - resm = START_PATTERN.search(content[start:]) - if resm is None: - break - depth = 1 - current_start = resm.start()+start - start += resm.end()+1 - current_infobox_name = resm.group(1) - else: - resm = END_PATTERN.search(content[start:]) - if resm is None: - break - if resm.group(0) == "{{": - depth += 1 - elif resm.group(0) == "}}": - depth -= 1 - if depth == 0: - res.append((content[current_start:resm.end()+start], current_infobox_name)) - start += resm.end()+1 - - - return rev['revid'],res - - def split_infoboxes(self, src): - - start = 0 - previous_end = 0 - split_indexes = [] - delimiter_stack = [] - while start<=len(src): - resd = DELIMITER_PATTERN.search(src[start:]) - ress = SPLIT_PATTERN.search(src[start:]) if len(delimiter_stack) == 0 else None - startd = resd.start() if resd is not None else sys.maxint - starts = ress.start() if ress is not None else sys.maxint - if starts < startd: - if len(split_indexes)>0: - split_indexes.append((previous_end, ress.start(0)+start)) - split_indexes.append((ress.start(1)+start, ress.end(1)+start)) - start += ress.end(0) - previous_end = start - elif startd < sys.maxint: - if resd.group().startswith("{") or resd.group().startswith("[") : - delimiter_stack.append(resd.group()) - elif len(delimiter_stack)>0 and ( (delimiter_stack[-1].startswith('{') and resd.group()[0] == '}') or (delimiter_stack[-1].startswith('[') and resd.group()[0] == ']') ) and len(delimiter_stack[-1]) == len(resd.group()): - delimiter_stack.pop() - start += resd.end() - else: - break - - if previous_end > 0: - split_indexes.append((previous_end,len(src))) - res = [src[start:end] for start,end in split_indexes] - return res - - - - def process_infoboxes(self, infobox_defs, tag): - - if not infobox_defs: - return - - revision_id = infobox_defs[0] - for infobox in infobox_defs[1]: - src = infobox[0].strip(' \t\n\r') - name = infobox[1] - tag_infobox, created = TagInfobox.objects.get_or_create(tag=tag, name=name, revision_id = revision_id, defaults={'source': src}) - if not created: - tag_infobox.source = src - tag_infobox.save() - - src = START_PATTERN.sub('',src[:-2]).strip() - keyvalues = self.split_infoboxes(src) - - for key,value in itertools.izip(*[itertools.islice(keyvalues, i, None, 2) for i in range(2)]): - param, created = InfoboxParameter.objects.get_or_create(tag_infobox=tag_infobox, param_name=key.strip(), defaults={'param_value':value.strip()}) - if not created: - param.param_value = value.strip() - param.save() - - def handle_noargs(self, **options): - - self.style = no_style() - - interactive = options.get('interactive', True) - - verbosity = int(options.get('verbosity', '1')) - - force = options.get('force', False) - - limit = options.get("limit", -1) - start = options.get("start", 0) - - site_url = options.get('site_url', settings.WIKIPEDIA_API_URL) - - random = options.get('random', False) - - types_mask = 0 - types_list = options.get('types', []) - - if len(types_list) == 0: - types_mask = TYPES_MASK_DICT['all'] - else: - for t in types_list: - types_mask |= TYPES_MASK_DICT[t] - - if verbosity > 1 : - print "types mask %s " % (bin(types_mask)) - - if verbosity > 2: - print "option passed : " + repr(options) - - - queryset = Tag.objects.exclude(wikipedia_pageid= None) - - if not options.get('all',False): - queryset = queryset.annotate(wpc=Count('wp_categories')).filter(wpc = 0) - #else: - # queryset = Tag.objects.filter(url_status=None) - - if random: - queryset = queryset.order_by("?") - else: - queryset = queryset.order_by("label") - - if limit >= 0: - queryset = queryset[start:limit] - elif start > 0: - queryset = queryset[start:] - - if verbosity > 2 : - print "Tag Query is %s" % (queryset.query) - - site = wiki.Wiki(site_url) #@UndefinedVariable - - - count = queryset.count() - if verbosity > 1: - print "Processing %d tags" % (count) - - if not force and interactive: - confirm = raw_input("You have requested to query and replace the wikipedia information for %d tags.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count)) - else: - confirm = 'yes' - - if confirm != "yes": - print "wikipedia query cancelled" - return - - - - for i, tag in enumerate(queryset): - - if verbosity > 1: - print "processing tag %s (%d/%d)" % (tag.label, i + 1, count) - else: - utils.show_progress(i + 1, count, tag.label, 60) - - # query categories - wikipedia_pageid = tag.wikipedia_pageid - if tag.url_status == Tag.TAG_URL_STATUS_DICT['redirection'] and tag.alternative_wikipedia_pageid is not None : - wikipedia_pageid = tag.alternative_wikipedia_pageid - - with transaction.commit_on_success(): - if types_mask & TYPES_MASK_DICT['visible']: - res = self.query_all_categories(False, site, wikipedia_pageid) - self.process_categories(res, False, tag) - - if types_mask & TYPES_MASK_DICT['hidden']: - res = self.query_all_categories(True, site, wikipedia_pageid) - self.process_categories(res, True, tag) - - if types_mask & TYPES_MASK_DICT['infobox']: - res = self.query_infoboxes(site, wikipedia_pageid) - self.process_infoboxes(res, tag) - diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdabo/migrations/0003_update_redirection.py --- a/web/hdabo/migrations/0003_update_redirection.py Mon Jan 30 18:20:21 2012 +0100 +++ b/web/hdabo/migrations/0003_update_redirection.py Mon Jan 30 18:20:59 2012 +0100 @@ -1,8 +1,5 @@ # encoding: utf-8 -import datetime -from south.db import db from south.v2 import DataMigration -from django.db import models from hdabo import utils, wp_utils import sys from wikitools import wiki @@ -15,7 +12,7 @@ sys.stdout.write("Processing %d tags\n" % (queryset_count)) sys.stdout.flush() - site = wiki.Wiki("http://fr.wikipedia.org/w/api.php") + site = wiki.Wiki("http://fr.wikipedia.org/w/api.php") #@UndefinedVariable for i,tag in enumerate(queryset): diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdabo/migrations/0004_wp_category.py --- a/web/hdabo/migrations/0004_wp_category.py Mon Jan 30 18:20:21 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,277 +0,0 @@ -# encoding: utf-8 -import datetime -from south.db import db -from south.v2 import SchemaMigration -from django.db import models - -class Migration(SchemaMigration): - - def forwards(self, orm): - - # Adding model 'TagWpCategory' - db.create_table('hdabo_tagwpcategory', ( - ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), - ('tag', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['hdabo.Tag'])), - ('wp_category', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['hdabo.WpCategory'])), - ('hidden', self.gf('django.db.models.fields.BooleanField')(default=False)), - )) - db.send_create_signal('hdabo', ['TagWpCategory']) - - # Adding model 'WpCategory' - db.create_table('hdabo_wpcategory', ( - ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), - ('label', self.gf('django.db.models.fields.CharField')(unique=True, max_length=2048)), - )) - db.send_create_signal('hdabo', ['WpCategory']) - - # Adding model 'InfoboxParameter' - db.create_table('hdabo_infoboxparameter', ( - ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), - ('tag_infobox', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['hdabo.TagInfobox'])), - ('param_name', self.gf('django.db.models.fields.CharField')(max_length=2048)), - ('param_value', self.gf('django.db.models.fields.TextField')(null=True, blank=True)), - )) - db.send_create_signal('hdabo', ['InfoboxParameter']) - - # Adding model 'TagInfobox' - db.create_table('hdabo_taginfobox', ( - ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), - ('tag', self.gf('django.db.models.fields.related.ForeignKey')(related_name='infoboxes', to=orm['hdabo.Tag'])), - ('name', self.gf('django.db.models.fields.CharField')(max_length=2048)), - ('source', self.gf('django.db.models.fields.TextField')(null=True, blank=True)), - ('revision_id', self.gf('django.db.models.fields.BigIntegerField')(null=True, blank=True)), - )) - db.send_create_signal('hdabo', ['TagInfobox']) - - - def backwards(self, orm): - - # Deleting model 'TagWpCategory' - db.delete_table('hdabo_tagwpcategory') - - # Deleting model 'WpCategory' - db.delete_table('hdabo_wpcategory') - - # Deleting model 'InfoboxParameter' - db.delete_table('hdabo_infoboxparameter') - - # Deleting model 'TagInfobox' - db.delete_table('hdabo_taginfobox') - - - models = { - 'auth.group': { - 'Meta': {'object_name': 'Group'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}), - 'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}) - }, - 'auth.permission': { - 'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'}, - 'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}), - 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}) - }, - 'auth.user': { - 'Meta': {'object_name': 'User'}, - 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), - 'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}), - 'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), - 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), - 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), - 'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), - 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), - 'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), - 'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}), - 'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}), - 'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'}) - }, - 'contenttypes.contenttype': { - 'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"}, - 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), - 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) - }, - 'hdabo.author': { - 'Meta': {'object_name': 'Author'}, - 'firstname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}), - 'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'lastname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}) - }, - 'hdabo.datasheet': { - 'Meta': {'object_name': 'Datasheet'}, - 'author': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Author']", 'null': 'True', 'blank': 'True'}), - 'college_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_college_periods']", 'to': "orm['hdabo.TimePeriod']"}), - 'college_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_college_themes']", 'to': "orm['hdabo.Domain']"}), - 'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), - 'domains': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_domains']", 'to': "orm['hdabo.Domain']"}), - 'format': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.DocumentFormat']", 'null': 'True', 'blank': 'True'}), - 'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}), - 'highschool_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_highschool_periods']", 'to': "orm['hdabo.TimePeriod']"}), - 'highschool_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_highschool_themes']", 'to': "orm['hdabo.Domain']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'manual_order': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), - 'modification_datetime': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), - 'organisation': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Organisation']"}), - 'original_creation_date': ('django.db.models.fields.DateField', [], {}), - 'original_modification_date': ('django.db.models.fields.DateField', [], {}), - 'primary_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_primary_periods']", 'to': "orm['hdabo.TimePeriod']"}), - 'primary_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_primary_themes']", 'to': "orm['hdabo.Domain']"}), - 'tags': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['hdabo.Tag']", 'through': "orm['hdabo.TaggedSheet']", 'symmetrical': 'False'}), - 'title': ('django.db.models.fields.CharField', [], {'max_length': '2048'}), - 'town': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Location']", 'null': 'True', 'blank': 'True'}), - 'url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), - 'validated': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), - 'validation_date': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), - 'validator': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']", 'null': 'True', 'blank': 'True'}) - }, - 'hdabo.datasheet_college_periods': { - 'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_periods'}, - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'sort_value': ('django.db.models.fields.IntegerField', [], {}), - 'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"}) - }, - 'hdabo.datasheet_college_themes': { - 'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_themes'}, - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'sort_value': ('django.db.models.fields.IntegerField', [], {}) - }, - 'hdabo.datasheet_domains': { - 'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_domains'}, - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'sort_value': ('django.db.models.fields.IntegerField', [], {}) - }, - 'hdabo.datasheet_highschool_periods': { - 'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_periods'}, - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'sort_value': ('django.db.models.fields.IntegerField', [], {}), - 'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"}) - }, - 'hdabo.datasheet_highschool_themes': { - 'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_themes'}, - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'sort_value': ('django.db.models.fields.IntegerField', [], {}) - }, - 'hdabo.datasheet_primary_periods': { - 'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_periods'}, - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'sort_value': ('django.db.models.fields.IntegerField', [], {}), - 'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"}) - }, - 'hdabo.datasheet_primary_themes': { - 'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_themes'}, - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'sort_value': ('django.db.models.fields.IntegerField', [], {}) - }, - 'hdabo.documentformat': { - 'Meta': {'object_name': 'DocumentFormat'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}) - }, - 'hdabo.domain': { - 'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'Domain'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}), - 'school_period': ('django.db.models.fields.IntegerField', [], {}) - }, - 'hdabo.infoboxparameter': { - 'Meta': {'object_name': 'InfoboxParameter'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'param_name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}), - 'param_value': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), - 'tag_infobox': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagInfobox']"}) - }, - 'hdabo.location': { - 'Meta': {'object_name': 'Location'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'insee': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '5'}), - 'name': ('django.db.models.fields.CharField', [], {'max_length': '512'}) - }, - 'hdabo.organisation': { - 'Meta': {'object_name': 'Organisation'}, - 'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'location': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}), - 'name': ('django.db.models.fields.CharField', [], {'max_length': '512'}), - 'website': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}) - }, - 'hdabo.tag': { - 'Meta': {'unique_together': "(('label', 'original_label', 'url_status'),)", 'object_name': 'Tag'}, - 'alias': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), - 'alternative_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), - 'alternative_wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}), - 'alternative_wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), - 'category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagCategory']", 'null': 'True', 'blank': 'True'}), - 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), - 'dbpedia_uri': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}), - 'normalized_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}), - 'original_label': ('django.db.models.fields.CharField', [], {'max_length': '1024'}), - 'popularity': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}), - 'url_status': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), - 'wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}), - 'wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), - 'wp_categories': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['hdabo.WpCategory']", 'through': "orm['hdabo.TagWpCategory']", 'symmetrical': 'False'}) - }, - 'hdabo.tagcategory': { - 'Meta': {'object_name': 'TagCategory'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}) - }, - 'hdabo.taggedsheet': { - 'Meta': {'object_name': 'TaggedSheet'}, - 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'index_note': ('django.db.models.fields.FloatField', [], {'default': '0.0', 'db_index': 'True'}), - 'order': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}), - 'original_order': ('django.db.models.fields.IntegerField', [], {'default': '0'}), - 'tag': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Tag']"}), - 'wikipedia_revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}) - }, - 'hdabo.taginfobox': { - 'Meta': {'object_name': 'TagInfobox'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}), - 'revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}), - 'source': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), - 'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'infoboxes'", 'to': "orm['hdabo.Tag']"}) - }, - 'hdabo.tagwpcategory': { - 'Meta': {'object_name': 'TagWpCategory'}, - 'hidden': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'tag': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Tag']"}), - 'wp_category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.WpCategory']"}) - }, - 'hdabo.timeperiod': { - 'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'TimePeriod'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}), - 'school_period': ('django.db.models.fields.IntegerField', [], {}) - }, - 'hdabo.wpcategory': { - 'Meta': {'object_name': 'WpCategory'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '2048'}) - } - } - - complete_apps = ['hdabo'] diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdabo/migrations/0005_wp_db_indexes.py --- a/web/hdabo/migrations/0005_wp_db_indexes.py Mon Jan 30 18:20:21 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,248 +0,0 @@ -# encoding: utf-8 -import datetime -from south.db import db -from south.v2 import SchemaMigration -from django.db import models - -class Migration(SchemaMigration): - - def forwards(self, orm): - - # Adding unique constraint on 'TagWpCategory', fields ['wp_category', 'hidden', 'tag'] - db.create_unique('hdabo_tagwpcategory', ['wp_category_id', 'hidden', 'tag_id']) - - # Adding unique constraint on 'InfoboxParameter', fields ['param_name', 'tag_infobox'] - db.create_unique('hdabo_infoboxparameter', ['param_name', 'tag_infobox_id']) - - # Adding unique constraint on 'TagInfobox', fields ['revision_id', 'tag', 'name'] - db.create_unique('hdabo_taginfobox', ['revision_id', 'tag_id', 'name']) - - - def backwards(self, orm): - - # Removing unique constraint on 'TagInfobox', fields ['revision_id', 'tag', 'name'] - db.delete_unique('hdabo_taginfobox', ['revision_id', 'tag_id', 'name']) - - # Removing unique constraint on 'InfoboxParameter', fields ['param_name', 'tag_infobox'] - db.delete_unique('hdabo_infoboxparameter', ['param_name', 'tag_infobox_id']) - - # Removing unique constraint on 'TagWpCategory', fields ['wp_category', 'hidden', 'tag'] - db.delete_unique('hdabo_tagwpcategory', ['wp_category_id', 'hidden', 'tag_id']) - - - models = { - 'auth.group': { - 'Meta': {'object_name': 'Group'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}), - 'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}) - }, - 'auth.permission': { - 'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'}, - 'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}), - 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}) - }, - 'auth.user': { - 'Meta': {'object_name': 'User'}, - 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), - 'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}), - 'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), - 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), - 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), - 'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), - 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), - 'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), - 'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}), - 'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}), - 'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'}) - }, - 'contenttypes.contenttype': { - 'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"}, - 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), - 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) - }, - 'hdabo.author': { - 'Meta': {'object_name': 'Author'}, - 'firstname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}), - 'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'lastname': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}) - }, - 'hdabo.datasheet': { - 'Meta': {'object_name': 'Datasheet'}, - 'author': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Author']", 'null': 'True', 'blank': 'True'}), - 'college_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_college_periods']", 'to': "orm['hdabo.TimePeriod']"}), - 'college_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'college_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_college_themes']", 'to': "orm['hdabo.Domain']"}), - 'description': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), - 'domains': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_domains']", 'to': "orm['hdabo.Domain']"}), - 'format': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.DocumentFormat']", 'null': 'True', 'blank': 'True'}), - 'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}), - 'highschool_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_highschool_periods']", 'to': "orm['hdabo.TimePeriod']"}), - 'highschool_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'highschool_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_highschool_themes']", 'to': "orm['hdabo.Domain']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'manual_order': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), - 'modification_datetime': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), - 'organisation': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Organisation']"}), - 'original_creation_date': ('django.db.models.fields.DateField', [], {}), - 'original_modification_date': ('django.db.models.fields.DateField', [], {}), - 'primary_periods': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_periods_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_primary_periods']", 'to': "orm['hdabo.TimePeriod']"}), - 'primary_themes': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'primary_themes_datasheets'", 'symmetrical': 'False', 'through': "orm['hdabo.Datasheet_primary_themes']", 'to': "orm['hdabo.Domain']"}), - 'tags': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['hdabo.Tag']", 'through': "orm['hdabo.TaggedSheet']", 'symmetrical': 'False'}), - 'title': ('django.db.models.fields.CharField', [], {'max_length': '2048'}), - 'town': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Location']", 'null': 'True', 'blank': 'True'}), - 'url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), - 'validated': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), - 'validation_date': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), - 'validator': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']", 'null': 'True', 'blank': 'True'}) - }, - 'hdabo.datasheet_college_periods': { - 'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_periods'}, - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'sort_value': ('django.db.models.fields.IntegerField', [], {}), - 'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"}) - }, - 'hdabo.datasheet_college_themes': { - 'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_college_themes'}, - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'sort_value': ('django.db.models.fields.IntegerField', [], {}) - }, - 'hdabo.datasheet_domains': { - 'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_domains'}, - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'sort_value': ('django.db.models.fields.IntegerField', [], {}) - }, - 'hdabo.datasheet_highschool_periods': { - 'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_periods'}, - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'sort_value': ('django.db.models.fields.IntegerField', [], {}), - 'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"}) - }, - 'hdabo.datasheet_highschool_themes': { - 'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_highschool_themes'}, - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'sort_value': ('django.db.models.fields.IntegerField', [], {}) - }, - 'hdabo.datasheet_primary_periods': { - 'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_periods'}, - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'sort_value': ('django.db.models.fields.IntegerField', [], {}), - 'timeperiod': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TimePeriod']"}) - }, - 'hdabo.datasheet_primary_themes': { - 'Meta': {'ordering': "['sort_value']", 'object_name': 'Datasheet_primary_themes'}, - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'domain': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Domain']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'sort_value': ('django.db.models.fields.IntegerField', [], {}) - }, - 'hdabo.documentformat': { - 'Meta': {'object_name': 'DocumentFormat'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}) - }, - 'hdabo.domain': { - 'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'Domain'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}), - 'school_period': ('django.db.models.fields.IntegerField', [], {}) - }, - 'hdabo.infoboxparameter': { - 'Meta': {'unique_together': "(('tag_infobox', 'param_name'),)", 'object_name': 'InfoboxParameter'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'param_name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}), - 'param_value': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), - 'tag_infobox': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagInfobox']"}) - }, - 'hdabo.location': { - 'Meta': {'object_name': 'Location'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'insee': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '5'}), - 'name': ('django.db.models.fields.CharField', [], {'max_length': '512'}) - }, - 'hdabo.organisation': { - 'Meta': {'object_name': 'Organisation'}, - 'hda_id': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'location': ('django.db.models.fields.CharField', [], {'max_length': '512', 'null': 'True', 'blank': 'True'}), - 'name': ('django.db.models.fields.CharField', [], {'max_length': '512'}), - 'website': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}) - }, - 'hdabo.tag': { - 'Meta': {'unique_together': "(('label', 'original_label', 'url_status'),)", 'object_name': 'Tag'}, - 'alias': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), - 'alternative_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), - 'alternative_wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}), - 'alternative_wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), - 'category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagCategory']", 'null': 'True', 'blank': 'True'}), - 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), - 'dbpedia_uri': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}), - 'normalized_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}), - 'original_label': ('django.db.models.fields.CharField', [], {'max_length': '1024'}), - 'popularity': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}), - 'url_status': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), - 'wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}), - 'wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), - 'wp_categories': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['hdabo.WpCategory']", 'through': "orm['hdabo.TagWpCategory']", 'symmetrical': 'False'}) - }, - 'hdabo.tagcategory': { - 'Meta': {'object_name': 'TagCategory'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}) - }, - 'hdabo.taggedsheet': { - 'Meta': {'object_name': 'TaggedSheet'}, - 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), - 'datasheet': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Datasheet']"}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'index_note': ('django.db.models.fields.FloatField', [], {'default': '0.0', 'db_index': 'True'}), - 'order': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}), - 'original_order': ('django.db.models.fields.IntegerField', [], {'default': '0'}), - 'tag': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Tag']"}), - 'wikipedia_revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}) - }, - 'hdabo.taginfobox': { - 'Meta': {'unique_together': "(('tag', 'name', 'revision_id'),)", 'object_name': 'TagInfobox'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}), - 'revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}), - 'source': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), - 'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'infoboxes'", 'to': "orm['hdabo.Tag']"}) - }, - 'hdabo.tagwpcategory': { - 'Meta': {'unique_together': "(('tag', 'wp_category', 'hidden'),)", 'object_name': 'TagWpCategory'}, - 'hidden': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'tag': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.Tag']"}), - 'wp_category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.WpCategory']"}) - }, - 'hdabo.timeperiod': { - 'Meta': {'unique_together': "(('label', 'school_period'),)", 'object_name': 'TimePeriod'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'label': ('django.db.models.fields.CharField', [], {'max_length': '512'}), - 'school_period': ('django.db.models.fields.IntegerField', [], {}) - }, - 'hdabo.wpcategory': { - 'Meta': {'object_name': 'WpCategory'}, - 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), - 'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '2048'}) - } - } - - complete_apps = ['hdabo'] diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdabo/models.py --- a/web/hdabo/models.py Mon Jan 30 18:20:21 2012 +0100 +++ b/web/hdabo/models.py Mon Jan 30 18:20:59 2012 +0100 @@ -90,16 +90,6 @@ class Meta: verbose_name_plural = "TagCategories" -class WpCategory(models.Model): - label = models.CharField(max_length=2048, unique=True, blank=False, null=False) - - def __unicode__(self): - return unicode(self.label) - - class Meta: - verbose_name_plural = "WpCategories" - - class Tag(models.Model): TAG_URL_STATUS_CHOICES = ( (0, "null_result"), @@ -131,7 +121,6 @@ url_status = models.IntegerField(choices=TAG_URL_STATUS_CHOICES, blank=True, null=True, default=None) dbpedia_uri = models.URLField(verify_exists=False, max_length=2048, blank=True, null=True) popularity = models.IntegerField(blank=False, null=False, default=0, db_index=True) - wp_categories = models.ManyToManyField(WpCategory, through='TagWpCategory') @Property def url_status_text(): #@NoSelf @@ -409,30 +398,5 @@ domain = models.ForeignKey(Domain, db_index=True, null=False, blank=False) -class TagWpCategory(models.Model): - tag = models.ForeignKey(Tag) - wp_category = models.ForeignKey(WpCategory) - hidden = models.BooleanField(blank=False, null=False) - - class Meta: - unique_together = ('tag', 'wp_category', 'hidden') - -class TagInfobox(models.Model): - tag = models.ForeignKey(Tag, related_name="infoboxes") - name = models.CharField(max_length=2048, unique=False, blank=False, null=False) - source = models.TextField(unique=False, blank=True, null=True) - revision_id = models.BigIntegerField(unique=False, blank=True, null=True) - - class Meta: - unique_together = ('tag','name','revision_id') - -class InfoboxParameter(models.Model): - tag_infobox = models.ForeignKey(TagInfobox) - param_name = models.CharField(max_length=2048, unique=False, blank=False, null=False) - param_value = models.TextField(unique=False, blank=True, null=True) - - class Meta: - unique_together = ('tag_infobox','param_name') - \ No newline at end of file diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdabo/utils.py --- a/web/hdabo/utils.py Mon Jan 30 18:20:21 2012 +0100 +++ b/web/hdabo/utils.py Mon Jan 30 18:20:59 2012 +0100 @@ -3,6 +3,7 @@ import unicodedata import sys import math +import codecs ### # allow to declare a property as a decorator @@ -348,7 +349,12 @@ def normalize(str): return remove_accents(str).lower().replace(u"œ",u"oe") -def show_progress(current_line, total_line, label, width): +def show_progress(current_line, total_line, label, width, writer=None): + + if writer is None: + writer = sys.stdout + if sys.stdout.encoding is not None: + writer = codecs.getwriter(sys.stdout.encoding)(sys.stdout) percent = (float(current_line) / float(total_line)) * 100.0 @@ -356,9 +362,13 @@ spaces = math.floor(width - marks) loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']' + + s = u"%s %3d%% %*d/%d - %*s\r" % (loader, percent, len(str(total_line)), current_line, total_line, width, label[:width]) - sys.stdout.write(u"%s %d%% %d/%d - %r\r" % (loader, percent, current_line, total_line, label[:50].rjust(50))) #takes the header into account + writer.write(s) #takes the header into account if percent >= 100: - sys.stdout.write("\n") - sys.stdout.flush() + writer.write("\n") + writer.flush() + + return writer diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdalab/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/hdalab/__init__.py Mon Jan 30 18:20:59 2012 +0100 @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- +VERSION = (0, 1, 0, "final", 0) + + +def get_version(): + version = '%s.%s' % (VERSION[0], VERSION[1]) + if VERSION[2]: + version = '%s.%s' % (version, VERSION[2]) + if VERSION[3:] == ('alpha', 0): + version = '%s pre-alpha' % version + else: + if VERSION[3] != 'final': + version = '%s %s %s' % (version, VERSION[3], VERSION[4]) + return version + + +__version__ = get_version() diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdalab/config.py.tmpl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/hdalab/config.py.tmpl Mon Jan 30 18:20:59 2012 +0100 @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +''' +Created on Jan 26, 2012 + +@author: ymh +''' +import os + +DEBUG = True +TEMPLATE_DEBUG = DEBUG + +ADMINS = ( + # ('Your Name', 'your_email@example.com'), +) + +MANAGERS = ADMINS + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.', # Add 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'. + 'NAME': '', # Or path to database file if using sqlite3. + 'USER': '', # Not used with sqlite3. + 'PASSWORD': '', # Not used with sqlite3. + 'HOST': '', # Set to empty string for localhost. Not used with sqlite3. + 'PORT': '', # Set to empty string for default. Not used with sqlite3. + } +} + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)).rstrip("/") + "/" +BASE_URL = '/~ymh/hdabo/' +WEB_URL = 'http://localhost' + + +# Absolute filesystem path to the directory that will hold user-uploaded files. +# Example: "/home/media/media.lawrence.com/media/" +MEDIA_ROOT = os.path.abspath(BASE_DIR + "../static/media/") + +# URL that handles the media served from MEDIA_ROOT. Make sure to use a +# trailing slash. +# Examples: "http://media.lawrence.com/media/", "http://example.com/media/" +MEDIA_URL = BASE_URL + "static/media/" + +# Absolute path to the directory static files should be collected to. +# Don't put anything in this directory yourself; store your static files +# in apps' "static/" subdirectories and in STATICFILES_DIRS. +# Example: "/home/media/media.lawrence.com/static/" +STATIC_ROOT = os.path.abspath(BASE_DIR + "../static/site/") + +# URL prefix for static files. +# Example: "http://media.lawrence.com/static/" +STATIC_URL = BASE_URL + "static/site/" + +# URL prefix for admin static files -- CSS, JavaScript and images. +# Make sure to use a trailing slash. +# Examples: "http://foo.com/static/admin/", "/static/admin/". +ADMIN_MEDIA_PREFIX = STATIC_URL + 'admin/' + diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdalab/manage.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/hdalab/manage.py Mon Jan 30 18:20:59 2012 +0100 @@ -0,0 +1,15 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from django.core.management import execute_manager +import imp +try: + imp.find_module('settings') # Assumed to be in the same directory. +except ImportError: + import sys + sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n" % __file__) + sys.exit(1) + +import settings + +if __name__ == "__main__": + execute_manager(settings) diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdalab/management/__init__.py diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdalab/management/commands/__init__.py diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdalab/management/commands/export_tags_csv.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/hdalab/management/commands/export_tags_csv.py Mon Jan 30 18:20:59 2012 +0100 @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +''' +Created on Jan 25, 2012 + +@author: ymh +''' + +from django.core.management.base import BaseCommand, CommandError +from django.db.models import Q +from hdabo import utils +from hdabo.models import Tag +from optparse import make_option +import csv +import cStringIO +import codecs + +class UnicodeWriter: + """ + A CSV writer which will write rows to CSV file "f", + which is encoded in the given encoding. + """ + + def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): + # Redirect output to a queue + self.queue = cStringIO.StringIO() + self.writer = csv.writer(self.queue, dialect=dialect, **kwds) + self.stream = f + self.encoder = codecs.getincrementalencoder(encoding)() + + def writerow(self, row): + self.writer.writerow([s.encode("utf-8") for s in row]) + # Fetch UTF-8 output from the queue ... + data = self.queue.getvalue() + data = data.decode("utf-8") + # ... and reencode it into the target encoding + data = self.encoder.encode(data) + # write to the target stream + self.stream.write(data) + # empty queue + self.queue.truncate(0) + + def writerows(self, rows): + for row in rows: + self.writerow(row) + +class Command(BaseCommand): + ''' + Command to export tags + ''' + args = '' + options = '[-c|--category filter by category name] [-e|--encoding csv file encoding]' + help = """export csv files for hdabo +Options: + -c, --category : filter by category + --lines : max number of lines to load (for each file). 0 means all. + --encoding : files encoding. default to latin-1""" + + option_list = BaseCommand.option_list + ( + make_option("-c","--category", + action='append', + type='string', + dest='categories', + default=[], + help='filter tag by category (or)'), + make_option("-e","--encoding", + action='store', + type='string', + dest='encoding', + default="utf-8", + help='file encoding, default utf-8'), + make_option("-f","--force", + action='store_true', + dest='force', + default=False, + help='force file overwrite'), + ) + + def handle(self, *args, **options): + + if len(args) == 0 or not args[0]: + raise CommandError("Gives at last one csv file to export") + + self.encoding = options.get("encoding", "utf-8") + self.categories = options.get("categories", []) + self.force = options.get("force", False) + self.base_path = args[0].strip() + self.interactive = options.get("interactive",True) + + files_path = { + "visible" : { 'path':self.base_path + "_visible.txt",}, + "hidden" : { 'path':self.base_path + "_hidden.txt",}, + "infobox" : { 'path':self.base_path + "_infobox.txt",}, + } + + try: + for filedef in files_path.values(): + try: + filedef['file'] = open(filedef['path'],'r') + if (not self.force) and self.interactive: + print filedef['path'] + resp = raw_input("export file already exists. override ? type yes to continue : ") + if resp is not None and (resp.lower() == "yes" or resp.lower() == "y"): + self.force = True + # clear file + else: + return "error" + elif not self.interactive and not self.force: + print "Export file %s already exists. Exit." % (filedef['path']) + return "error" + + filedef['file'].close() + filedef['file'] = open(filedef['path'],'w') + except IOError: + filedef['file'] = open(filedef['path'],'w') + + filedef['csv'] = UnicodeWriter(filedef['file'], doublequote=False, escapechar="\\", encoding=self.encoding) + + queryset = Tag.objects.exclude(wikipedia_pageid= None) + cat_filter = None + for cat in self.categories: + if cat_filter is None: + cat_filter = Q(category__label = cat) + else: + cat_filter = cat_filter | Q(category__label = cat) + if cat_filter is not None: + queryset = queryset.filter(cat_filter) + + tcount = queryset.count() + + print "Exporting %d tags" % (tcount) + writer = None + + for i,t in enumerate(queryset.order_by("label")): + + writer = utils.show_progress(i+1, tcount, t.label, 50, writer) + #normal category + row = [t.label,] + [cat.wp_category.label for cat in t.wp_categories.filter(hidden=False)] + files_path['visible']['csv'].writerow(row) + + #hidden category + row = [t.label,] + [cat.wp_category.label for cat in t.wp_categories.filter(hidden=True)] + files_path['hidden']['csv'].writerow(row) + + #infobox + for i in t.infoboxes.all(): + vec = [[p.param_name,p.param_value.replace('\n',"\\n")] for p in i.infoboxparameter_set.all()] + ib_params = [num for elem in vec for num in elem] + row = [t.label, i.name.strip()] + ib_params + files_path['infobox']['csv'].writerow(row) + + finally: + for filedef in files_path.itervalues(): + if filedef.get('file',None): + filedef['file'].close() + + + + \ No newline at end of file diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdalab/management/commands/query_wikipedia_category.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/hdalab/management/commands/query_wikipedia_category.py Mon Jan 30 18:20:59 2012 +0100 @@ -0,0 +1,396 @@ +# -*- coding: utf-8 -*- +''' +Created on Jun 7, 2011 + +@author: ymh +''' + +from django.conf import settings +from django.core.management.base import NoArgsCommand +from django.core.management.color import no_style +from hdabo.models import Tag +from hdalab.models import WpCategory, TagWpCategory, TagInfobox, InfoboxParameter +from optparse import make_option +from wikitools import api,wiki +import sys +import re +import itertools +from hdabo import utils +from django.db.models import Count +from django.db import transaction + + +TYPES_MASK_DICT = { + u'visible': 0b001, + u'hidden': 0b010, + u'infobox': 0b100, + u'all': 0b111, + } + +START_PATTERN = re.compile(u"\{\{\s?Infobox\s+([^|]+)", re.M|re.U|re.I) +END_PATTERN = re.compile(u"\{\{|\}\}", re.M|re.U) +SPLIT_PATTERN = re.compile("\s*?\|\s*([\w]+[^=|]*)\s*=", re.U|re.M) +DELIMITER_PATTERN = re.compile("\{{2,3}|\}{2,3}|\[\[|\]\]|\[|\]") +COMMENT_PATTERN = re.compile("",re.U|re.M) + + + +class Command(NoArgsCommand): + ''' + query and update wikipedia for tag title. + ''' + options = '' + help = """query and update wikipedia for tag title.""" + + option_list = NoArgsCommand.option_list + ( + make_option('--all', + action='store_true', + dest='all', + default=False, + help='force all tags to be updated, not only those not yet processed'), + make_option('--force', + action='store_true', + dest='force', + default=False, + help='ask no questions'), + make_option('--random', + action='store_true', + dest='random', + default=False, + help='randomize query on tags'), + make_option('--site', + action='store', + type='string', + dest='site_url', + default="http://fr.wikipedia.org/w/api.php", + help='the url for the wikipedia site'), + make_option('--limit', + action='store', + type='int', + dest='limit', + default= -1, + help='number of tag to process'), + make_option('--start', + action='store', + type='int', + dest='start', + default=0, + help='number of tag to ignore'), + make_option('--type', + action='append', + dest='types', + type='choice', + choices=['visible','hidden', 'infobox', 'all'], + default=[], + help='what type of query to perform : visible : visible categories, hidden : hidden categories, infobox: infoboxes, all: all of them. This option can be assed multiple times'), + make_option('--use-label', + action='store_true', + dest='use_label', + default=False, + help='use label instead of pageid to query wikipedia'), + make_option('--tag', + action='append', + dest='tags', + type='string', + default=[], + help='the tag to query'), + + ) + + +# def process_wp_response(self, label, response): +# +# query_dict = response['query'] +# # get page if multiple pages or none -> return Tag.null_result +# pages = query_dict.get("pages", {}) +# if len(pages) > 1 or len(pages) == 0: +# return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None +# +# page = pages.values()[0] +# +# if u"invalid" in page or u"missing" in page: +# return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None +# +# url = page.get(u'fullurl', None) +# pageid = page.get(u'pageid', None) +# new_label = page[u'title'] +# +# if self.__is_homonymie(page): +# status = Tag.TAG_URL_STATUS_DICT["homonyme"] +# elif u"redirect" in page: +# status = Tag.TAG_URL_STATUS_DICT["redirection"] +# else: +# status = Tag.TAG_URL_STATUS_DICT["match"] +# +# return new_label, status, url, pageid + + def query_all_categories(self, hidden, site, pageid, use_label): + + clshow = 'hidden' if hidden else '!hidden' + params = {'action':'query', 'titles' if use_label else 'pageids': pageid, 'prop':'categories', 'clshow': clshow} + + clcontinue = "" + res = [] + + while clcontinue is not None: + if clcontinue: + params['clcontinue'] = clcontinue + + wpquery = api.APIRequest(site, params) #@UndefinedVariable + response = wpquery.query() + + if self.verbosity > 1: + print "Query infoboxes : " + repr(wpquery.request.get_full_url()+"?"+wpquery.request.get_data()) + print repr(response) + + + query_dict = response.get('query', None) + + if query_dict is None: + return res + + pages = query_dict.get("pages", {}) + if len(pages) > 1 or len(pages) == 0: + return res + + page = pages.values()[0] + + for cat in page.get('categories',[]): + title = cat.get('title',"") + title = title[title.find(":")+1:] + if title and clcontinue != ("%s|%s" % (pageid,title)): + res.append(title) + + clcontinue = response.get('query-continue', {}).get('categories',{}).get('clcontinue', None) + + if self.verbosity > 1: + print "Query infoboxes RES: " + print repr(res) + + return res + + def process_categories(self, cat_list, hidden, tag): + + for cat in cat_list: + wp_cat,created = WpCategory.objects.get_or_create(label=cat) #@UnusedVariable + TagWpCategory.objects.get_or_create(tag=tag, wp_category=wp_cat, hidden=hidden) + + + def query_infoboxes(self, site, pageid, use_label): + + res = [] + params = {'action':'query', 'titles' if use_label else 'pageids': pageid, 'prop':'revisions', 'rvprop': 'ids|content'} + wpquery = api.APIRequest(site, params) #@UndefinedVariable + response = wpquery.query() + + query_dict = response.get('query', None) + + if query_dict is None: + return res + + pages = query_dict.get("pages", {}) + if len(pages) > 1 or len(pages) == 0: + return res + + page = pages.values()[0] + + if 'revisions' not in page or not page['revisions']: + return res + + rev = page['revisions'][0] + + content = rev['*'] + + start = 0 + depth = 0 + current_infobox_name = None + current_start = 0 + + while start <= len(content): + if depth==0: + resm = START_PATTERN.search(content[start:]) + if resm is None: + break + depth = 1 + current_start = resm.start()+start + start += resm.end()+1 + current_infobox_name = resm.group(1) + else: + resm = END_PATTERN.search(content[start:]) + if resm is None: + break + if resm.group(0) == "{{": + depth += 1 + elif resm.group(0) == "}}": + depth -= 1 + if depth == 0: + res.append((content[current_start:resm.end()+start], current_infobox_name)) + start += resm.end()+1 + + return_val = (rev['revid'],res) + + if self.verbosity > 1: + print "Query infoboxes url: " + repr(wpquery.request.get_full_url()+"?"+wpquery.request.get_data()) + print repr(return_val) + + return return_val + + def split_infoboxes(self, src): + + start = 0 + previous_end = 0 + split_indexes = [] + delimiter_stack = [] + while start<=len(src): + resd = DELIMITER_PATTERN.search(src[start:]) + ress = SPLIT_PATTERN.search(src[start:]) if len(delimiter_stack) == 0 else None + startd = resd.start() if resd is not None else sys.maxint + starts = ress.start() if ress is not None else sys.maxint + if starts < startd: + if len(split_indexes)>0: + split_indexes.append((previous_end, ress.start(0)+start)) + split_indexes.append((ress.start(1)+start, ress.end(1)+start)) + start += ress.end(0) + previous_end = start + elif startd < sys.maxint: + if resd.group().startswith("{") or resd.group().startswith("[") : + delimiter_stack.append(resd.group()) + elif len(delimiter_stack)>0 and ( (delimiter_stack[-1].startswith('{') and resd.group()[0] == '}') or (delimiter_stack[-1].startswith('[') and resd.group()[0] == ']') ) and len(delimiter_stack[-1]) == len(resd.group()): + delimiter_stack.pop() + start += resd.end() + else: + break + + if previous_end > 0: + split_indexes.append((previous_end,len(src))) + res = [src[start:end] for start,end in split_indexes] + return res + + + + def process_infoboxes(self, infobox_defs, tag): + + if not infobox_defs: + return + + revision_id = infobox_defs[0] + for infobox in infobox_defs[1]: + src = infobox[0].strip(' \t\n\r') + name = infobox[1] + tag_infobox, created = TagInfobox.objects.get_or_create(tag=tag, name=name, revision_id = revision_id, defaults={'source': src}) + if not created: + tag_infobox.source = src + tag_infobox.save() + + src = COMMENT_PATTERN.sub('',src) + src = START_PATTERN.sub('',src[:-2]).strip() + + keyvalues = self.split_infoboxes(src) + + for key,value in itertools.izip(*[itertools.islice(keyvalues, i, None, 2) for i in range(2)]): + param, created = InfoboxParameter.objects.get_or_create(tag_infobox=tag_infobox, param_name=key.strip(), defaults={'param_value':value.strip()}) + if not created: + param.param_value = value.strip() + param.save() + + def handle_noargs(self, **options): + + self.style = no_style() + + interactive = options.get('interactive', True) + + self.verbosity = int(options.get('verbosity', '1')) + use_label = options.get('use_label', False) + + force = options.get('force', False) + + limit = options.get("limit", -1) + start = options.get("start", 0) + + site_url = options.get('site_url', settings.WIKIPEDIA_API_URL) + + random = options.get('random', False) + + types_mask = 0 + types_list = options.get('types', []) + + if len(types_list) == 0: + types_mask = TYPES_MASK_DICT['all'] + else: + for t in types_list: + types_mask |= TYPES_MASK_DICT[t] + + if self.verbosity > 1 : + print "types mask %s " % (bin(types_mask)) + + if self.verbosity > 2: + print "option passed : " + repr(options) + + + queryset = Tag.objects.exclude(wikipedia_pageid= None) + + tag_list = options.get("tags", []); + + if tag_list: + queryset = queryset.filter(label__in=tag_list) + elif not options.get('all',False): + queryset = queryset.annotate(wpc=Count('wp_categories')).filter(wpc = 0) + #else: + # queryset = Tag.objects.filter(url_status=None) + + if random: + queryset = queryset.order_by("?") + else: + queryset = queryset.order_by("label") + + if limit >= 0: + queryset = queryset[start:limit] + elif start > 0: + queryset = queryset[start:] + + if self.verbosity > 2 : + print "Tag Query is %s" % (queryset.query) + + site = wiki.Wiki(site_url) #@UndefinedVariable + + + count = queryset.count() + if self.verbosity > 1: + print "Processing %d tags" % (count) + + if not force and interactive: + confirm = raw_input("You have requested to query and replace the wikipedia information for %d tags.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count)) + else: + confirm = 'yes' + + if confirm != "yes": + print "wikipedia query cancelled" + return + + + + for i, tag in enumerate(queryset): + + if self.verbosity > 1: + print "processing tag %s (%d/%d)" % (tag.label, i + 1, count) + else: + utils.show_progress(i + 1, count, tag.label, 60) + + # query categories + wikipedia_pageid = tag.label if use_label else tag.wikipedia_pageid + if tag.url_status == Tag.TAG_URL_STATUS_DICT['redirection'] and tag.alternative_wikipedia_pageid is not None : + wikipedia_pageid = tag.alternative_label if use_label else tag.alternative_wikipedia_pageid + + with transaction.commit_on_success(): + if types_mask & TYPES_MASK_DICT['visible']: + res = self.query_all_categories(False, site, wikipedia_pageid, use_label) + self.process_categories(res, False, tag) + + if types_mask & TYPES_MASK_DICT['hidden']: + res = self.query_all_categories(True, site, wikipedia_pageid, use_label) + self.process_categories(res, True, tag) + + if types_mask & TYPES_MASK_DICT['infobox']: + res = self.query_infoboxes(site, wikipedia_pageid, use_label) + self.process_infoboxes(res, tag) + diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdalab/migrations/0001_initial.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/hdalab/migrations/0001_initial.py Mon Jan 30 18:20:59 2012 +0100 @@ -0,0 +1,131 @@ +# encoding: utf-8 +from south.db import db +from south.v2 import SchemaMigration + +class Migration(SchemaMigration): + + def forwards(self, orm): + + # Adding model 'WpCategory' + db.create_table('hdalab_wpcategory', ( #@UndefinedVariable + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('label', self.gf('django.db.models.fields.CharField')(unique=True, max_length=2048)), + )) + db.send_create_signal('hdalab', ['WpCategory']) #@UndefinedVariable + + # Adding model 'TagWpCategory' + db.create_table('hdalab_tagwpcategory', ( #@UndefinedVariable + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('tag', self.gf('django.db.models.fields.related.ForeignKey')(related_name='wp_categories', to=orm['hdabo.Tag'])), + ('wp_category', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['hdalab.WpCategory'])), + ('hidden', self.gf('django.db.models.fields.BooleanField')(default=False)), + )) + db.send_create_signal('hdalab', ['TagWpCategory']) #@UndefinedVariable + + # Adding unique constraint on 'TagWpCategory', fields ['tag', 'wp_category', 'hidden'] + db.create_unique('hdalab_tagwpcategory', ['tag_id', 'wp_category_id', 'hidden']) #@UndefinedVariable + + # Adding model 'TagInfobox' + db.create_table('hdalab_taginfobox', ( #@UndefinedVariable + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('tag', self.gf('django.db.models.fields.related.ForeignKey')(related_name='infoboxes', to=orm['hdabo.Tag'])), + ('name', self.gf('django.db.models.fields.CharField')(max_length=2048)), + ('source', self.gf('django.db.models.fields.TextField')(null=True, blank=True)), + ('revision_id', self.gf('django.db.models.fields.BigIntegerField')(null=True, blank=True)), + )) + db.send_create_signal('hdalab', ['TagInfobox']) #@UndefinedVariable + + # Adding unique constraint on 'TagInfobox', fields ['tag', 'name', 'revision_id'] + db.create_unique('hdalab_taginfobox', ['tag_id', 'name', 'revision_id']) #@UndefinedVariable + + # Adding model 'InfoboxParameter' + db.create_table('hdalab_infoboxparameter', ( #@UndefinedVariable + ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)), + ('tag_infobox', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['hdalab.TagInfobox'])), + ('param_name', self.gf('django.db.models.fields.CharField')(max_length=2048)), + ('param_value', self.gf('django.db.models.fields.TextField')(null=True, blank=True)), + )) + db.send_create_signal('hdalab', ['InfoboxParameter']) #@UndefinedVariable + + # Adding unique constraint on 'InfoboxParameter', fields ['tag_infobox', 'param_name'] + db.create_unique('hdalab_infoboxparameter', ['tag_infobox_id', 'param_name']) #@UndefinedVariable + + + def backwards(self, orm): + + # Removing unique constraint on 'InfoboxParameter', fields ['tag_infobox', 'param_name'] + db.delete_unique('hdalab_infoboxparameter', ['tag_infobox_id', 'param_name']) #@UndefinedVariable + + # Removing unique constraint on 'TagInfobox', fields ['tag', 'name', 'revision_id'] + db.delete_unique('hdalab_taginfobox', ['tag_id', 'name', 'revision_id']) #@UndefinedVariable + + # Removing unique constraint on 'TagWpCategory', fields ['tag', 'wp_category', 'hidden'] + db.delete_unique('hdalab_tagwpcategory', ['tag_id', 'wp_category_id', 'hidden']) #@UndefinedVariable + + # Deleting model 'WpCategory' + db.delete_table('hdalab_wpcategory') #@UndefinedVariable + + # Deleting model 'TagWpCategory' + db.delete_table('hdalab_tagwpcategory') #@UndefinedVariable + + # Deleting model 'TagInfobox' + db.delete_table('hdalab_taginfobox') #@UndefinedVariable + + # Deleting model 'InfoboxParameter' + db.delete_table('hdalab_infoboxparameter') #@UndefinedVariable + + + models = { + 'hdabo.tag': { + 'Meta': {'unique_together': "(('label', 'original_label', 'url_status'),)", 'object_name': 'Tag'}, + 'alias': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), + 'alternative_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'null': 'True', 'blank': 'True'}), + 'alternative_wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}), + 'alternative_wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), + 'category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdabo.TagCategory']", 'null': 'True', 'blank': 'True'}), + 'created_at': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'dbpedia_uri': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}), + 'normalized_label': ('django.db.models.fields.CharField', [], {'max_length': '1024', 'db_index': 'True'}), + 'original_label': ('django.db.models.fields.CharField', [], {'max_length': '1024'}), + 'popularity': ('django.db.models.fields.IntegerField', [], {'default': '0', 'db_index': 'True'}), + 'url_status': ('django.db.models.fields.IntegerField', [], {'default': 'None', 'null': 'True', 'blank': 'True'}), + 'wikipedia_pageid': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}), + 'wikipedia_url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}) + }, + 'hdabo.tagcategory': { + 'Meta': {'object_name': 'TagCategory'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '512'}) + }, + 'hdalab.infoboxparameter': { + 'Meta': {'unique_together': "(('tag_infobox', 'param_name'),)", 'object_name': 'InfoboxParameter'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'param_name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}), + 'param_value': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'tag_infobox': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdalab.TagInfobox']"}) + }, + 'hdalab.taginfobox': { + 'Meta': {'unique_together': "(('tag', 'name', 'revision_id'),)", 'object_name': 'TagInfobox'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '2048'}), + 'revision_id': ('django.db.models.fields.BigIntegerField', [], {'null': 'True', 'blank': 'True'}), + 'source': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'infoboxes'", 'to': "orm['hdabo.Tag']"}) + }, + 'hdalab.tagwpcategory': { + 'Meta': {'unique_together': "(('tag', 'wp_category', 'hidden'),)", 'object_name': 'TagWpCategory'}, + 'hidden': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'tag': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'wp_categories'", 'to': "orm['hdabo.Tag']"}), + 'wp_category': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['hdalab.WpCategory']"}) + }, + 'hdalab.wpcategory': { + 'Meta': {'object_name': 'WpCategory'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'label': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '2048'}) + } + } + + complete_apps = ['hdalab'] diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdalab/migrations/__init__.py diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdalab/models/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/hdalab/models/__init__.py Mon Jan 30 18:20:59 2012 +0100 @@ -0,0 +1,3 @@ +from hdalab.models.categories import WpCategory, InfoboxParameter, TagInfobox, TagWpCategory + +__all__ = ['WpCategory', 'InfoboxParameter', 'TagInfobox', 'TagWpCategory'] \ No newline at end of file diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdalab/models/categories.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/hdalab/models/categories.py Mon Jan 30 18:20:59 2012 +0100 @@ -0,0 +1,46 @@ +''' +Created on Jan 26, 2012 + +@author: ymh +''' +from django.db import models +from hdabo.models import Tag + + +class WpCategory(models.Model): + label = models.CharField(max_length=2048, unique=True, blank=False, null=False) + + def __unicode__(self): + return unicode(self.label) + + class Meta: + app_label = 'hdalab' + verbose_name_plural = "WpCategories" + +class TagWpCategory(models.Model): + tag = models.ForeignKey(Tag, related_name="wp_categories") + wp_category = models.ForeignKey(WpCategory) + hidden = models.BooleanField(blank=False, null=False) + + class Meta: + app_label = 'hdalab' + unique_together = ('tag', 'wp_category', 'hidden') + +class TagInfobox(models.Model): + tag = models.ForeignKey(Tag, related_name="infoboxes") + name = models.CharField(max_length=2048, unique=False, blank=False, null=False) + source = models.TextField(unique=False, blank=True, null=True) + revision_id = models.BigIntegerField(unique=False, blank=True, null=True) + + class Meta: + app_label = 'hdalab' + unique_together = ('tag','name','revision_id') + +class InfoboxParameter(models.Model): + tag_infobox = models.ForeignKey(TagInfobox) + param_name = models.CharField(max_length=2048, unique=False, blank=False, null=False) + param_value = models.TextField(unique=False, blank=True, null=True) + + class Meta: + app_label = 'hdalab' + unique_together = ('tag_infobox','param_name') diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdalab/settings.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/hdalab/settings.py Mon Jan 30 18:20:59 2012 +0100 @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +# Django settings for hdalab project. + +DEBUG = True +TEMPLATE_DEBUG = DEBUG + +ADMINS = ( + # ('Your Name', 'your_email@example.com'), +) + +MANAGERS = ADMINS + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.', # Add 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'. + 'NAME': '', # Or path to database file if using sqlite3. + 'USER': '', # Not used with sqlite3. + 'PASSWORD': '', # Not used with sqlite3. + 'HOST': '', # Set to empty string for localhost. Not used with sqlite3. + 'PORT': '', # Set to empty string for default. Not used with sqlite3. + } +} + +# Local time zone for this installation. Choices can be found here: +# http://en.wikipedia.org/wiki/List_of_tz_zones_by_name +# although not all choices may be available on all operating systems. +# On Unix systems, a value of None will cause Django to use the same +# timezone as the operating system. +# If running in a Windows environment this must be set to the same as your +# system time zone. +TIME_ZONE = 'America/Chicago' + +# Language code for this installation. All choices can be found here: +# http://www.i18nguy.com/unicode/language-identifiers.html +LANGUAGE_CODE = 'en-us' + +SITE_ID = 1 + +# If you set this to False, Django will make some optimizations so as not +# to load the internationalization machinery. +USE_I18N = True + +# If you set this to False, Django will not format dates, numbers and +# calendars according to the current locale +USE_L10N = True + +# Absolute filesystem path to the directory that will hold user-uploaded files. +# Example: "/home/media/media.lawrence.com/media/" +MEDIA_ROOT = '' + +# URL that handles the media served from MEDIA_ROOT. Make sure to use a +# trailing slash. +# Examples: "http://media.lawrence.com/media/", "http://example.com/media/" +MEDIA_URL = '' + +# Absolute path to the directory static files should be collected to. +# Don't put anything in this directory yourself; store your static files +# in apps' "static/" subdirectories and in STATICFILES_DIRS. +# Example: "/home/media/media.lawrence.com/static/" +STATIC_ROOT = '' + +# URL prefix for static files. +# Example: "http://media.lawrence.com/static/" +STATIC_URL = '/static/' + +# URL prefix for admin static files -- CSS, JavaScript and images. +# Make sure to use a trailing slash. +# Examples: "http://foo.com/static/admin/", "/static/admin/". +ADMIN_MEDIA_PREFIX = '/static/admin/' + +# Additional locations of static files +STATICFILES_DIRS = ( + # Put strings here, like "/home/html/static" or "C:/www/django/static". + # Always use forward slashes, even on Windows. + # Don't forget to use absolute paths, not relative paths. +) + +# List of finder classes that know how to find static files in +# various locations. +STATICFILES_FINDERS = ( + 'django.contrib.staticfiles.finders.FileSystemFinder', + 'django.contrib.staticfiles.finders.AppDirectoriesFinder', +# 'django.contrib.staticfiles.finders.DefaultStorageFinder', +) + +# Make this unique, and don't share it with anybody. +SECRET_KEY = 'u!@fo&-)d-hqz7==jmc2*_^__wod8$k^lb7^)y@ihbok)gn4fe' + +# List of callables that know how to import templates from various sources. +TEMPLATE_LOADERS = ( + 'django.template.loaders.filesystem.Loader', + 'django.template.loaders.app_directories.Loader', +# 'django.template.loaders.eggs.Loader', +) + +MIDDLEWARE_CLASSES = ( + 'django.middleware.common.CommonMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', +) + +ROOT_URLCONF = 'hdalab.urls' + +TEMPLATE_DIRS = ( + # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates". + # Always use forward slashes, even on Windows. + # Don't forget to use absolute paths, not relative paths. +) + +INSTALLED_APPS = ( + 'south', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.sites', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'django.contrib.admin', + 'django_extensions', + 'hdabo', + 'hdalab' +) + +# A sample logging configuration. The only tangible logging +# performed by this configuration is to send an email to +# the site admins on every HTTP 500 error. +# See http://docs.djangoproject.com/en/dev/topics/logging for +# more details on how to customize your logging configuration. +#LOGGING = { +# 'version': 1, +# 'disable_existing_loggers': False, +# 'handlers': { +# 'mail_admins': { +# 'level': 'ERROR', +# 'class': 'django.utils.log.AdminEmailHandler' +# } +# }, +# 'loggers': { +# 'django.request': { +# 'handlers': ['mail_admins'], +# 'level': 'ERROR', +# 'propagate': True, +# }, +# } +#} + +HAYSTACK_SITECONF = 'hdabo.search.sites' +HAYSTACK_SEARCH_ENGINE = 'hdabo.search.french_whoosh' +#HAYSTACK_WHOOSH_PATH = os.path.abspath(BASE_DIR + "../index/").rstrip("/") + "/" + +WIKIPEDIA_API_URL = "http://fr.wikipedia.org/w/api.php" +WIKIPEDIA_VERSION_PERMALINK_TEMPLATE = "http://fr.wikipedia.org/w/index.php?oldid=%s" +DBPEDIA_URI_TEMPLATE = "http://dbpedia.org/resource/%s" + + + +from hdalab.config import * #@UnusedWildImport + diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdalab/urls.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/hdalab/urls.py Mon Jan 30 18:20:59 2012 +0100 @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +from django.conf.urls.defaults import patterns, include, url +from django.contrib import admin + +# Uncomment the next two lines to enable the admin: +# from django.contrib import admin +# admin.autodiscover() + +urlpatterns = patterns('', + # Examples: + # url(r'^$', 'hdalab.views.home', name='home'), + # url(r'^hdalab/', include('hdalab.foo.urls')), + + # Uncomment the admin/doc line below to enable admin documentation: + # url(r'^admin/doc/', include('django.contrib.admindocs.urls')), + + # Uncomment the next line to enable the admin: + url(r'^admin/', include(admin.site.urls)), + + +) diff -r 46c0f7a935d1 -r 0f9cc90c49fa web/hdalab/views/__init__.py