import pertimm video + first version of sync script
authorymh <ymh.work@gmail.com>
Thu, 21 Feb 2013 18:31:59 +0100
changeset 52 eea37d86feea
parent 51 d842aad55f81
child 53 59c080c5148f
import pertimm video + first version of sync script
.hgignore
sbin/sync/config.py.tmpl
sbin/sync/fabfile.py
src/egonomy/config.py.tmpl
src/egonomy/management/commands/importPertimm.py
src/egonomy/search_indexes/backends/elasticsearch_backend.py
src/egonomy/search_indexes/indexes.py
src/egonomy/templates/search/indexes/egonomy/imagemetadata_text.txt
--- a/.hgignore	Wed Feb 20 16:56:45 2013 +0100
+++ b/.hgignore	Thu Feb 21 18:31:59 2013 +0100
@@ -12,4 +12,5 @@
 ^web/static/media
 ^src/log\.txt$
 ^run/log/
-^log$
\ No newline at end of file
+^log$
+^sbin/sync/config\.py$
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sbin/sync/config.py.tmpl	Thu Feb 21 18:31:59 2013 +0100
@@ -0,0 +1,64 @@
+from fabric.api import env
+from random import choice
+
+env.hosts = ['iri@web.iri.centrepompidou.fr']
+
+env.web_group = 'www-data'
+env.folders = ['log', 'static/media']
+
+env.repos = {'web' : "/Users/ymh/dev/workspace/egonomy"}
+env.base_export_path = "~/tmp"
+env.export_prefix = "platform"
+
+env.remote_path = {
+    'web':"/iridata/www/ldt/",
+    'src':"/Users/ymh/dev/tmp/testfab/src",
+    'virtualenv':"/iridata/virtualenv/ldt",
+    'ldt_base':"/tmp",
+    'venv_export':"/iridata/users/iri/tmp",
+}
+
+env.platform_web_module = "egonomy"
+env.remote_baseline_venv = "/iridata/virtualenv/baseline2.7"
+
+env.rsync_filters = {
+    'src' : [
+        "P .htpasswd",
+        "P .htaccess",
+        "P egonomy/config.py",
+    ],
+    'web': [
+        "+ core",
+        "P .htpasswd",
+        "P .htaccess",
+        "P robots.txt",
+        "P env/***",
+        "P log/***",
+        "P index/***",
+        "P static/media/***",
+        "P crossdomain.xml",
+    ],
+    'venv': [
+        "+ core",
+    ]
+}
+env.web_relaunch_cmd = "supervisorctl restart egonomy"
+
+
+env.config = {
+    'web': {
+        'base_url': "/",
+        'web_url': 'http://egonomy.iri-resesarch.org',
+        'db_engine':'postgresql_psycopg2',
+        'db_name':'platform',
+        'db_user': 'iriuser',
+        'db_password': '',
+        'db_host': 'sql.iri.centrepompidou.fr',
+        'db_port': 5432,
+        'haystack_url' : 'http://localhost:9200',
+        'haystack_index' : 'egonomy',
+        'log_file' : env.remote_path['web'] + '/log/log.txt',
+        'secret_key' : ''.join([choice('abcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*(-_=+)') for i in range(50)]), 
+    },
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sbin/sync/fabfile.py	Thu Feb 21 18:31:59 2013 +0100
@@ -0,0 +1,58 @@
+from fablib import (export_version, do_sync_web, create_config, 
+    clean_export_folder, sync_install_build, do_create_virtualenv, 
+    clean_rsync_folder, rsync_export, do_relaunch_server)
+from fabric.api import task, env, sudo
+from fabric.colors import green
+import imp
+import os.path
+
+@task
+def relaunch_server(do_collectstatic=True, do_syncdb=True):
+    print("Relaunch server")
+    do_relaunch_server(do_collectstatic, do_syncdb)
+
+@task
+def sync_web(version):
+    print(green("sync web with version %s" % version))
+    export_path = export_version(web=version)
+    export_path_full = os.path.join(export_path,'web')
+    do_sync_web(version, export_path_full)
+    create_config(export_path_full)
+    clean_export_folder(export_path)
+    relaunch_server()
+    
+@task
+def update_lib(version, package):
+    print(green("update ldt with version %s" % version))
+    export_path = export_version(web=version)
+    export_path_full = os.path.join(export_path,'web')
+    lib_path = os.path.join(export_path_full, "virtualenv", "res", "lib")
+    
+    f, pathname, description = imp.find_module("patch", [lib_path])
+    imp.load_module("patch", f, pathname, description)
+    f, pathname, description = imp.find_module("lib_create_env", [lib_path])
+    lib_create_env = imp.load_module("lib_create_env", f, pathname, description)
+    
+    package_path_full = os.path.join(export_path_full, "virtualenv", "res", "src", lib_create_env.URLS[package]['local'])
+    
+    sync_install_build(package_path_full)
+    clean_export_folder(export_path)
+    relaunch_server()
+    
+
+@task
+def create_virtualenv(version):
+    print(green("create virtualenv with version %s" % version))
+    export_path = export_version(web=version)
+    export_path_web = os.path.join(export_path,'web')
+    venv_remote_export_path = ""
+    try:
+        virtualenv_path = os.path.join(export_path_web, "virtualenv")
+    
+        venv_remote_export_path = os.path.join(env.remote_path['venv_export'], env.export_prefix, version,"virtualenv")
+        rsync_export(virtualenv_path, venv_remote_export_path, env.rsync_filters['venv'])
+        do_create_virtualenv(venv_remote_export_path, env.remote_path['virtualenv'])
+    finally:
+        clean_export_folder(export_path)
+        if venv_remote_export_path:
+            clean_rsync_folder(venv_remote_export_path)
--- a/src/egonomy/config.py.tmpl	Wed Feb 20 16:56:45 2013 +0100
+++ b/src/egonomy/config.py.tmpl	Thu Feb 21 18:31:59 2013 +0100
@@ -2,29 +2,29 @@
 import os, logging
 
 BASE_DIR = os.path.dirname(os.path.abspath(__file__)).rstrip("/")+"/"
-BASE_URL = '/egonomy/'
-WEB_URL = 'http://localhost/'
+BASE_URL = '%(base_url)s'
+WEB_URL = '%(web_url)s'
 STATIC_URL = BASE_URL + 'static/site/'
 
 DEBUG = True
 
 DATABASES = {
     'default': {
-        'ENGINE': 'django.db.backends.postgresql_psycopg2', # Add 'postgresql_psycopg2', 'mysql', 'sqlite3' or 'oracle'.
-        'NAME': 'egonomy',                      # Or path to database file if using sqlite3.
+        'ENGINE': 'django.db.backends.%(db_engine)s', # Add 'postgresql_psycopg2', 'mysql', 'sqlite3' or 'oracle'.
+        'NAME': '%(db_name)s',                      # Or path to database file if using sqlite3.
         # The following settings are not used with sqlite3:
-        'USER': '',
-        'PASSWORD': '',
-        'HOST': 'localhost',                      # Empty for localhost through domain sockets or '127.0.0.1' for localhost through TCP.
-        'PORT': '5432',                      # Set to empty string for default.
+        'USER': '%(db_user)s',
+        'PASSWORD': '%(db_password)s',
+        'HOST': '%(db_host)s',                      # Empty for localhost through domain sockets or '127.0.0.1' for localhost through TCP.
+        'PORT': '%(db_port)d',                      # Set to empty string for default.
     }
 }
 
 HAYSTACK_CONNECTIONS = {
     'default': {
         'ENGINE': 'egonomy.search_indexes.backends.elasticsearch_backend.ElasticsearchSearchEngine',
-        'URL': 'http://127.0.0.1:9200/',
-        'INDEX_NAME': 'egonomy',
+        'URL': '%(haystack_url)s',
+        'INDEX_NAME': '%(haystack_index)',
     },
 }
 
@@ -32,7 +32,7 @@
 # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name
 # although not all choices may be available on all operating systems.
 # In a Windows environment this must be set to your system time zone.
-TIME_ZONE = 'America/Chicago'
+TIME_ZONE = 'Europe/Paris'
 
 # Language code for this installation. All choices can be found here:
 # http://www.i18nguy.com/unicode/language-identifiers.html
@@ -64,9 +64,10 @@
 
 
 # Make this unique, and don't share it with anybody.
-SECRET_KEY = 'abc123'
+SECRET_KEY = '%(secret_key)s'
 
-LOG_FILE = os.path.abspath(os.path.join(BASE_DIR,"../../run/log/log.txt"))
+#LOG_FILE = os.path.abspath(os.path.join(BASE_DIR,"../../run/log/log.txt"))
+LOG_FILE = '%(log_file)s'
 LOG_LEVEL = logging.DEBUG
 # A sample logging configuration. The only tangible logging
 # performed by this configuration is to send an email to
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/egonomy/management/commands/importPertimm.py	Thu Feb 21 18:31:59 2013 +0100
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jan 31, 2013
+
+@author: ymh
+'''
+
+from ..utils import show_progress
+from django.core.management.base import BaseCommand
+from django.db import transaction
+from egonomy.models import ImageMetadata
+from optparse import make_option
+import csv
+import sys
+import datetime
+
+
+class Command(BaseCommand):
+    '''
+    Import rmn csv files
+    '''
+
+    args = 'csv_file csv_file ...'
+    help = 'Import rmn csv files'
+    
+    option_list = BaseCommand.option_list + (
+        make_option('-n', '--max-lines',
+            dest= 'max_lines',
+            type='int',
+            default= sys.maxint,
+            help= 'max number of line to process, -1 process all file' 
+        ),
+        make_option('-b', '--batch-size',
+            dest= 'batch_size',
+            type='int',
+            default= 5000,
+            help= 'number of object to import in bulk operations' 
+        ),
+        make_option('-e', '--encoding',
+            dest= 'encoding',
+            default= 'utf8',
+            help= 'csv files encoding' 
+        ),
+        make_option('--skip',
+            dest= 'skip',
+            type='int',
+            default= 0,
+            help= 'number of entry to skip' 
+        ),
+        make_option('--stop',
+            dest= 'cont',
+            action= 'store_false',
+            default= True,
+            help= 'stop on error' 
+        ),
+        make_option('-l', '--log',
+            dest= 'log',
+            default= 'log.txt',
+            help= 'log file' 
+        ),
+    )
+    
+    def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None):
+        val = dict_arg.get(key, default)
+        return conv(val) if val else default
+
+    def __safe_decode(self, s):
+        if not isinstance(s, basestring):
+            return s
+        try:
+            return s.decode('utf8')
+        except:
+            try:
+                return s.decode('latin1')
+            except:
+                return s.decode('utf8','replace')
+
+    def handle(self, *args, **options):
+                
+        max_lines = options.get('max_lines', sys.maxint)
+        csv_files_dialect = {}
+        skip = options.get('skip', 0)
+        # calculating the number of lines to process
+        print("calculating number of line to process")
+        total = 0
+        for csv_file_path in args:            
+            with open(csv_file_path,'rb') as csv_file:
+                dialect = csv.Sniffer().sniff(csv_file.read(1024))
+                dialect.doublequote = True
+                csv_files_dialect[csv_file_path] = dialect
+                csv_file.seek(0)
+                for _ in csv.DictReader(csv_file, dialect=dialect):
+                    total += 1
+                    if total > max_lines:
+                        break
+        
+        nb_lines = min(max_lines, total)
+        batch_size = options.get('batch_size', 5000)
+        
+        print("There is %d lines to process, starting processing now." % nb_lines)
+        counter = 0
+        writer = None
+        encoding = options.get('encoding', 'utf8')
+        log_path = options.get('log', "log.txt")
+        cont_on_error = options.get('cont', True)
+
+        transaction.enter_transaction_management()
+        transaction.managed()
+        try:        
+            for csv_file_path in args:
+                with open(csv_file_path,'rb') as csv_file:
+                    dialect = csv_files_dialect.get(csv_file_path,None)
+                    if not dialect:
+                        dialect = csv.Sniffer().sniff(csv_file.read(1024))
+                        dialect.doublequote = True
+                        csv_file.seek(0)
+                    
+                    dictreader = csv.DictReader(csv_file, dialect=dialect)
+                    thesaurus_fields = sorted([fname for fname in dictreader.fieldnames if fname.startswith("THESAURUS")], key=lambda name: int(name[len("THESAURUS_"):]))
+                                        
+                    for row in dictreader:
+                        try:
+                            counter += 1
+                            if counter <= skip:
+                                continue
+                            if counter > nb_lines:
+                                break
+                            urow = dict([(k, v.decode(encoding, 'replace') if v else v) for k,v in row.items()])
+                            writer = show_progress(counter, nb_lines, u"%s - %s - %d/%d" % (urow['CLICHE'], urow['TITRE'], counter%batch_size, batch_size), 80, writer)
+                                                                                        
+                            img_id = urow['CLICHE']
+                            
+                            thesaurus_pertimm = "|".join([urow[fname] for fname in thesaurus_fields if urow[fname]])
+                                                        
+                            ImageMetadata.objects.filter(id=img_id).update(titre_pertimm=urow["PERTIMM_TITRE"], description_pertimm=urow["PERTIMM_DESCRIPTION"], thesaurus_pertimm=thesaurus_pertimm)
+                            
+                        except Exception as e:                            
+                            error_msg = "%s - Error treating line %d, file %s local %d : id %s - title : %s : %s\n" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),counter, csv_file_path, dictreader.line_num, row['CLICHE'] if (row and 'CLICHE' in row and row['CLICHE']) else 'n/a', row['TITRE'] if (row and 'TITRE' in row and row['TITRE']) else 'n/a', repr(e) )
+                            with open(log_path, 'a') as log_file:
+                                log_file.write(error_msg)
+                            if not cont_on_error:
+                                raise
+                        
+                        
+                        if not (counter%batch_size):
+                            transaction.commit()
+                            
+                        
+                if counter > nb_lines:
+                    break
+            
+            transaction.commit()        
+        except:
+            transaction.rollback()            
+            raise
+        finally:
+            transaction.leave_transaction_management()
+            
\ No newline at end of file
--- a/src/egonomy/search_indexes/backends/elasticsearch_backend.py	Wed Feb 20 16:56:45 2013 +0100
+++ b/src/egonomy/search_indexes/backends/elasticsearch_backend.py	Thu Feb 21 18:31:59 2013 +0100
@@ -6,7 +6,7 @@
 '''
 
 from django.db.models.loading import get_model
-from egonomy.models import ImageMetadata, Fragment
+from egonomy.models import Fragment
 from haystack.backends import BaseEngine, SearchResult, elasticsearch_backend
 from haystack.constants import DJANGO_CT, DJANGO_ID
 import datetime
--- a/src/egonomy/search_indexes/indexes.py	Wed Feb 20 16:56:45 2013 +0100
+++ b/src/egonomy/search_indexes/indexes.py	Thu Feb 21 18:31:59 2013 +0100
@@ -25,6 +25,10 @@
     localization = indexes.CharField(model_attr='localisation', null=True)
     tags = indexes.CharField(model_attr='mots_cles', null=True)
     
+    title_p = indexes.CharField(model_attr="titre_pertimm", null=True)
+    description_p = indexes.CharField(model_attr="titre_pertimm", null=True)
+    thesaurus_p = indexes.CharField(model_attr="thesaurus_pertimm", null=True)
+    
     def get_model(self):
         return ImageMetadata
 
--- a/src/egonomy/templates/search/indexes/egonomy/imagemetadata_text.txt	Wed Feb 20 16:56:45 2013 +0100
+++ b/src/egonomy/templates/search/indexes/egonomy/imagemetadata_text.txt	Thu Feb 21 18:31:59 2013 +0100
@@ -6,4 +6,7 @@
 {{ object.site }}
 {{ object.lieu }}
 {{ object.localisation }}
-{{ object.mots_cles }}
\ No newline at end of file
+{{ object.mots_cles }}
+{{ object.titre_pertimm }}
+{{ object.description_pertimm }}
+{{ object.thesaurus_pertimm }}
\ No newline at end of file