# HG changeset patch # User ymh # Date 1361467919 -3600 # Node ID eea37d86feea69251ab13b0b95fdc1ce2c76d27f # Parent d842aad55f812235df98270ec6bc56362d448f7b import pertimm video + first version of sync script diff -r d842aad55f81 -r eea37d86feea .hgignore --- a/.hgignore Wed Feb 20 16:56:45 2013 +0100 +++ b/.hgignore Thu Feb 21 18:31:59 2013 +0100 @@ -12,4 +12,5 @@ ^web/static/media ^src/log\.txt$ ^run/log/ -^log$ \ No newline at end of file +^log$ +^sbin/sync/config\.py$ \ No newline at end of file diff -r d842aad55f81 -r eea37d86feea sbin/sync/config.py.tmpl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sbin/sync/config.py.tmpl Thu Feb 21 18:31:59 2013 +0100 @@ -0,0 +1,64 @@ +from fabric.api import env +from random import choice + +env.hosts = ['iri@web.iri.centrepompidou.fr'] + +env.web_group = 'www-data' +env.folders = ['log', 'static/media'] + +env.repos = {'web' : "/Users/ymh/dev/workspace/egonomy"} +env.base_export_path = "~/tmp" +env.export_prefix = "platform" + +env.remote_path = { + 'web':"/iridata/www/ldt/", + 'src':"/Users/ymh/dev/tmp/testfab/src", + 'virtualenv':"/iridata/virtualenv/ldt", + 'ldt_base':"/tmp", + 'venv_export':"/iridata/users/iri/tmp", +} + +env.platform_web_module = "egonomy" +env.remote_baseline_venv = "/iridata/virtualenv/baseline2.7" + +env.rsync_filters = { + 'src' : [ + "P .htpasswd", + "P .htaccess", + "P egonomy/config.py", + ], + 'web': [ + "+ core", + "P .htpasswd", + "P .htaccess", + "P robots.txt", + "P env/***", + "P log/***", + "P index/***", + "P static/media/***", + "P crossdomain.xml", + ], + 'venv': [ + "+ core", + ] +} +env.web_relaunch_cmd = "supervisorctl restart egonomy" + + +env.config = { + 'web': { + 'base_url': "/", + 'web_url': 'http://egonomy.iri-resesarch.org', + 'db_engine':'postgresql_psycopg2', + 'db_name':'platform', + 'db_user': 'iriuser', + 'db_password': '', + 'db_host': 'sql.iri.centrepompidou.fr', + 'db_port': 5432, + 'haystack_url' : 'http://localhost:9200', + 'haystack_index' : 'egonomy', + 'log_file' : env.remote_path['web'] + '/log/log.txt', + 'secret_key' : ''.join([choice('abcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*(-_=+)') for i in range(50)]), + }, +} + diff -r d842aad55f81 -r eea37d86feea sbin/sync/fabfile.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sbin/sync/fabfile.py Thu Feb 21 18:31:59 2013 +0100 @@ -0,0 +1,58 @@ +from fablib import (export_version, do_sync_web, create_config, + clean_export_folder, sync_install_build, do_create_virtualenv, + clean_rsync_folder, rsync_export, do_relaunch_server) +from fabric.api import task, env, sudo +from fabric.colors import green +import imp +import os.path + +@task +def relaunch_server(do_collectstatic=True, do_syncdb=True): + print("Relaunch server") + do_relaunch_server(do_collectstatic, do_syncdb) + +@task +def sync_web(version): + print(green("sync web with version %s" % version)) + export_path = export_version(web=version) + export_path_full = os.path.join(export_path,'web') + do_sync_web(version, export_path_full) + create_config(export_path_full) + clean_export_folder(export_path) + relaunch_server() + +@task +def update_lib(version, package): + print(green("update ldt with version %s" % version)) + export_path = export_version(web=version) + export_path_full = os.path.join(export_path,'web') + lib_path = os.path.join(export_path_full, "virtualenv", "res", "lib") + + f, pathname, description = imp.find_module("patch", [lib_path]) + imp.load_module("patch", f, pathname, description) + f, pathname, description = imp.find_module("lib_create_env", [lib_path]) + lib_create_env = imp.load_module("lib_create_env", f, pathname, description) + + package_path_full = os.path.join(export_path_full, "virtualenv", "res", "src", lib_create_env.URLS[package]['local']) + + sync_install_build(package_path_full) + clean_export_folder(export_path) + relaunch_server() + + +@task +def create_virtualenv(version): + print(green("create virtualenv with version %s" % version)) + export_path = export_version(web=version) + export_path_web = os.path.join(export_path,'web') + venv_remote_export_path = "" + try: + virtualenv_path = os.path.join(export_path_web, "virtualenv") + + venv_remote_export_path = os.path.join(env.remote_path['venv_export'], env.export_prefix, version,"virtualenv") + rsync_export(virtualenv_path, venv_remote_export_path, env.rsync_filters['venv']) + do_create_virtualenv(venv_remote_export_path, env.remote_path['virtualenv']) + finally: + clean_export_folder(export_path) + if venv_remote_export_path: + clean_rsync_folder(venv_remote_export_path) diff -r d842aad55f81 -r eea37d86feea src/egonomy/config.py.tmpl --- a/src/egonomy/config.py.tmpl Wed Feb 20 16:56:45 2013 +0100 +++ b/src/egonomy/config.py.tmpl Thu Feb 21 18:31:59 2013 +0100 @@ -2,29 +2,29 @@ import os, logging BASE_DIR = os.path.dirname(os.path.abspath(__file__)).rstrip("/")+"/" -BASE_URL = '/egonomy/' -WEB_URL = 'http://localhost/' +BASE_URL = '%(base_url)s' +WEB_URL = '%(web_url)s' STATIC_URL = BASE_URL + 'static/site/' DEBUG = True DATABASES = { 'default': { - 'ENGINE': 'django.db.backends.postgresql_psycopg2', # Add 'postgresql_psycopg2', 'mysql', 'sqlite3' or 'oracle'. - 'NAME': 'egonomy', # Or path to database file if using sqlite3. + 'ENGINE': 'django.db.backends.%(db_engine)s', # Add 'postgresql_psycopg2', 'mysql', 'sqlite3' or 'oracle'. + 'NAME': '%(db_name)s', # Or path to database file if using sqlite3. # The following settings are not used with sqlite3: - 'USER': '', - 'PASSWORD': '', - 'HOST': 'localhost', # Empty for localhost through domain sockets or '127.0.0.1' for localhost through TCP. - 'PORT': '5432', # Set to empty string for default. + 'USER': '%(db_user)s', + 'PASSWORD': '%(db_password)s', + 'HOST': '%(db_host)s', # Empty for localhost through domain sockets or '127.0.0.1' for localhost through TCP. + 'PORT': '%(db_port)d', # Set to empty string for default. } } HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'egonomy.search_indexes.backends.elasticsearch_backend.ElasticsearchSearchEngine', - 'URL': 'http://127.0.0.1:9200/', - 'INDEX_NAME': 'egonomy', + 'URL': '%(haystack_url)s', + 'INDEX_NAME': '%(haystack_index)', }, } @@ -32,7 +32,7 @@ # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name # although not all choices may be available on all operating systems. # In a Windows environment this must be set to your system time zone. -TIME_ZONE = 'America/Chicago' +TIME_ZONE = 'Europe/Paris' # Language code for this installation. All choices can be found here: # http://www.i18nguy.com/unicode/language-identifiers.html @@ -64,9 +64,10 @@ # Make this unique, and don't share it with anybody. -SECRET_KEY = 'abc123' +SECRET_KEY = '%(secret_key)s' -LOG_FILE = os.path.abspath(os.path.join(BASE_DIR,"../../run/log/log.txt")) +#LOG_FILE = os.path.abspath(os.path.join(BASE_DIR,"../../run/log/log.txt")) +LOG_FILE = '%(log_file)s' LOG_LEVEL = logging.DEBUG # A sample logging configuration. The only tangible logging # performed by this configuration is to send an email to diff -r d842aad55f81 -r eea37d86feea src/egonomy/management/commands/importPertimm.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/egonomy/management/commands/importPertimm.py Thu Feb 21 18:31:59 2013 +0100 @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- +''' +Created on Jan 31, 2013 + +@author: ymh +''' + +from ..utils import show_progress +from django.core.management.base import BaseCommand +from django.db import transaction +from egonomy.models import ImageMetadata +from optparse import make_option +import csv +import sys +import datetime + + +class Command(BaseCommand): + ''' + Import rmn csv files + ''' + + args = 'csv_file csv_file ...' + help = 'Import rmn csv files' + + option_list = BaseCommand.option_list + ( + make_option('-n', '--max-lines', + dest= 'max_lines', + type='int', + default= sys.maxint, + help= 'max number of line to process, -1 process all file' + ), + make_option('-b', '--batch-size', + dest= 'batch_size', + type='int', + default= 5000, + help= 'number of object to import in bulk operations' + ), + make_option('-e', '--encoding', + dest= 'encoding', + default= 'utf8', + help= 'csv files encoding' + ), + make_option('--skip', + dest= 'skip', + type='int', + default= 0, + help= 'number of entry to skip' + ), + make_option('--stop', + dest= 'cont', + action= 'store_false', + default= True, + help= 'stop on error' + ), + make_option('-l', '--log', + dest= 'log', + default= 'log.txt', + help= 'log file' + ), + ) + + def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None): + val = dict_arg.get(key, default) + return conv(val) if val else default + + def __safe_decode(self, s): + if not isinstance(s, basestring): + return s + try: + return s.decode('utf8') + except: + try: + return s.decode('latin1') + except: + return s.decode('utf8','replace') + + def handle(self, *args, **options): + + max_lines = options.get('max_lines', sys.maxint) + csv_files_dialect = {} + skip = options.get('skip', 0) + # calculating the number of lines to process + print("calculating number of line to process") + total = 0 + for csv_file_path in args: + with open(csv_file_path,'rb') as csv_file: + dialect = csv.Sniffer().sniff(csv_file.read(1024)) + dialect.doublequote = True + csv_files_dialect[csv_file_path] = dialect + csv_file.seek(0) + for _ in csv.DictReader(csv_file, dialect=dialect): + total += 1 + if total > max_lines: + break + + nb_lines = min(max_lines, total) + batch_size = options.get('batch_size', 5000) + + print("There is %d lines to process, starting processing now." % nb_lines) + counter = 0 + writer = None + encoding = options.get('encoding', 'utf8') + log_path = options.get('log', "log.txt") + cont_on_error = options.get('cont', True) + + transaction.enter_transaction_management() + transaction.managed() + try: + for csv_file_path in args: + with open(csv_file_path,'rb') as csv_file: + dialect = csv_files_dialect.get(csv_file_path,None) + if not dialect: + dialect = csv.Sniffer().sniff(csv_file.read(1024)) + dialect.doublequote = True + csv_file.seek(0) + + dictreader = csv.DictReader(csv_file, dialect=dialect) + thesaurus_fields = sorted([fname for fname in dictreader.fieldnames if fname.startswith("THESAURUS")], key=lambda name: int(name[len("THESAURUS_"):])) + + for row in dictreader: + try: + counter += 1 + if counter <= skip: + continue + if counter > nb_lines: + break + urow = dict([(k, v.decode(encoding, 'replace') if v else v) for k,v in row.items()]) + writer = show_progress(counter, nb_lines, u"%s - %s - %d/%d" % (urow['CLICHE'], urow['TITRE'], counter%batch_size, batch_size), 80, writer) + + img_id = urow['CLICHE'] + + thesaurus_pertimm = "|".join([urow[fname] for fname in thesaurus_fields if urow[fname]]) + + ImageMetadata.objects.filter(id=img_id).update(titre_pertimm=urow["PERTIMM_TITRE"], description_pertimm=urow["PERTIMM_DESCRIPTION"], thesaurus_pertimm=thesaurus_pertimm) + + except Exception as e: + error_msg = "%s - Error treating line %d, file %s local %d : id %s - title : %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),counter, csv_file_path, dictreader.line_num, row['CLICHE'] if (row and 'CLICHE' in row and row['CLICHE']) else 'n/a', row['TITRE'] if (row and 'TITRE' in row and row['TITRE']) else 'n/a', repr(e) ) + with open(log_path, 'a') as log_file: + log_file.write(error_msg) + if not cont_on_error: + raise + + + if not (counter%batch_size): + transaction.commit() + + + if counter > nb_lines: + break + + transaction.commit() + except: + transaction.rollback() + raise + finally: + transaction.leave_transaction_management() + \ No newline at end of file diff -r d842aad55f81 -r eea37d86feea src/egonomy/search_indexes/backends/elasticsearch_backend.py --- a/src/egonomy/search_indexes/backends/elasticsearch_backend.py Wed Feb 20 16:56:45 2013 +0100 +++ b/src/egonomy/search_indexes/backends/elasticsearch_backend.py Thu Feb 21 18:31:59 2013 +0100 @@ -6,7 +6,7 @@ ''' from django.db.models.loading import get_model -from egonomy.models import ImageMetadata, Fragment +from egonomy.models import Fragment from haystack.backends import BaseEngine, SearchResult, elasticsearch_backend from haystack.constants import DJANGO_CT, DJANGO_ID import datetime diff -r d842aad55f81 -r eea37d86feea src/egonomy/search_indexes/indexes.py --- a/src/egonomy/search_indexes/indexes.py Wed Feb 20 16:56:45 2013 +0100 +++ b/src/egonomy/search_indexes/indexes.py Thu Feb 21 18:31:59 2013 +0100 @@ -25,6 +25,10 @@ localization = indexes.CharField(model_attr='localisation', null=True) tags = indexes.CharField(model_attr='mots_cles', null=True) + title_p = indexes.CharField(model_attr="titre_pertimm", null=True) + description_p = indexes.CharField(model_attr="titre_pertimm", null=True) + thesaurus_p = indexes.CharField(model_attr="thesaurus_pertimm", null=True) + def get_model(self): return ImageMetadata diff -r d842aad55f81 -r eea37d86feea src/egonomy/templates/search/indexes/egonomy/imagemetadata_text.txt --- a/src/egonomy/templates/search/indexes/egonomy/imagemetadata_text.txt Wed Feb 20 16:56:45 2013 +0100 +++ b/src/egonomy/templates/search/indexes/egonomy/imagemetadata_text.txt Thu Feb 21 18:31:59 2013 +0100 @@ -6,4 +6,7 @@ {{ object.site }} {{ object.lieu }} {{ object.localisation }} -{{ object.mots_cles }} \ No newline at end of file +{{ object.mots_cles }} +{{ object.titre_pertimm }} +{{ object.description_pertimm }} +{{ object.thesaurus_pertimm }} \ No newline at end of file