--- a/README.txt Thu Jun 16 09:27:56 2011 +0200
+++ b/README.txt Thu Jul 21 09:42:33 2011 +0200
@@ -23,7 +23,8 @@
-------------
- Postgresql 8.3 or Mysql 5+ or sqlite
- Python 2.5+
-- Openoffice 3.0+ (headless) & Pandoc
+- Abiword or Openoffice 3.0+ (headless)
+- Pandoc
Requirements
@@ -32,9 +33,8 @@
- python magic
- python development headers
- python setuptools
-- python uno
- pandoc
-- headless openoffice
+- abiword (or headless openoffice and python uno)
- git
- libyaml
(all other python dependencies will be downloaded by buildout)
@@ -45,7 +45,9 @@
(ubuntu users : 'sudo apt-get install python python-magic python-setuptools python-uno libyaml-0-1 python-yaml python-dev git-core python-utidylib')
2. Install pandoc
(ubuntu users : 'sudo apt-get install pandoc')
-3. Install openoffice (headless mode) [used for document conversion]
+3. Install abiword
+ (ubuntu users: 'sudo apt-get install abiword')
+ Alternatively, install openoffice (headless mode) [used for document conversion]
(ubuntu users : 'sudo apt-get install sun-java6-jre openoffice.org openoffice.org-headless xvfb')
4. Install/configure database [skip this step if you plan to use a sqlite database]
4 a) Postgresql
@@ -105,11 +107,11 @@
- `./bin/django migrate cm 0001_initial --fake`
- `./bin/django migrate`
-Openoffice
-==========
-Comt uses openoffice to convert documents from ODT, MS Word, etc. to html.
-On a development setup, you should make sure no openoffice process is left and launch
-`soffice -headless "-accept=socket,port=2002;urp;"` to start openoffice in background mode.
+Abiword or Openoffice
+=====================
+Comt uses either abiword or openoffice to convert documents from ODT, MS Word, etc. to html.
+Abiword is a lighter and more performant solution. You have to add the configuration parameter `USE_ABI = True` in your settings_local.py to use Abiword. Otherwise openoffice is used.
+To use openoffice, on a development setup, you should make sure no openoffice process is left and launch `soffice -headless "-accept=socket,port=2002;urp;"` to start openoffice in background mode.
Comt uses
============
@@ -141,8 +143,24 @@
FAQ
====
-Q: I get 'import error' when starting the server (step #9)
-R: Make sure you installed all required python dependencies
+Q1: How can I check the distribution for errors (libraries etc.):
+R1: After configuring a database and access in your settings_local.py, you can launch the unit test suite with the following command: `./bin/django test cm`
+
+Q2: I'm getting the following error when launching the migrate command:
+`
+line 62, in handle
+ __import__(app_name + '.management', {}, {}, [''])
+ File "/usr/lib/python2.5/site-packages/uno.py", line 300, in _uno_import
+ raise ImportError( "type "+ name + "." +x + " is unknown" )
+ImportError: type django.contrib.sessions.management. is unknown
+`
+R2: This is due to a bug in uno (python openoffice bridge) that monkey patches the import system and messes with django's dynamic module loading system. A workaround to launch the migrate command is to set: `UNO_IMPORT = False` in file src/cm/converters/oo_converters.py and then to launch the migrate command. Set the value back to True and relaunch the server to use openoffice as a conversion backend.
+
+Q3: When using co-ment Drupal module, I want that the name of commentators to be the same as the Drupal username
+R3: For this feature (commentator name = drupal login name) to be available, a configuration parameter should be set in settings_local.py: `DECORATED_CREATORS = True`
+
+Q4: I get 'import error' when starting the server (step #9)
+R4: Make sure you installed all required python dependencies
Community
=========
--- a/src/cm/cm_settings.py Thu Jun 16 09:27:56 2011 +0200
+++ b/src/cm/cm_settings.py Thu Jul 21 09:42:33 2011 +0200
@@ -31,4 +31,7 @@
STORE_ACTIVITY_IP = get_setting('STORE_ACTIVITY_IP', True)
# Show 'decorated' users in comments (not structural creator id)
-DECORATED_CREATORS = get_setting('DECORATED_CREATORS', False)
\ No newline at end of file
+DECORATED_CREATORS = get_setting('DECORATED_CREATORS', False)
+
+# Use AbiWord for conversions
+USE_ABI = get_setting('USE_ABI', False)
--- a/src/cm/converters/__init__.py Thu Jun 16 09:27:56 2011 +0200
+++ b/src/cm/converters/__init__.py Thu Jul 21 09:42:33 2011 +0200
@@ -3,7 +3,7 @@
from cm.utils.string_utils import to_unicode
import re
import os
-from cm.converters.oo_converters import extract_css_body
+from oo_converters import extract_css_body
# TODO: move that in text_base: save images
@@ -18,16 +18,26 @@
attachs = []
attachs_dir = None
##############################
+ # OO/MS-Word
if mime_type in ['application/vnd.oasis.opendocument.text',
'application/msword',
+ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
]:
- html_input, xhtml_input, attachs = convert_oo_to_html_and_xhtml(input)
- if format == 'html':
+ from cm.cm_settings import USE_ABI
+ if USE_ABI:
+ from abi_converters import AbiFileConverter
+ converter = AbiFileConverter()
+ html_input, attachs = converter.convert_to_html(input)
+ html_input = re.sub(r' awml:style="[^"]*"', '', html_input)
+ converted_input = pandoc_convert(html_input, 'html', format)
+ else:
+ html_input, xhtml_input, attachs = convert_oo_to_html_and_xhtml(input)
+ if format == 'html':
_not_used_css, converted_input = extract_css_body(xhtml_input)
#converted_input = xhtml_input
- converted_input = pandoc_convert(html_input, 'html', format)
+ converted_input = pandoc_convert(html_input, 'html', format)
##############################
# latex
@@ -136,4 +146,4 @@
CODE_INDICATOR = " " # 4 spaces
return '\n'.join([CODE_INDICATOR + line for line in code.split('\n')])
-
\ No newline at end of file
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cm/converters/abi_converters.py Thu Jul 21 09:42:33 2011 +0200
@@ -0,0 +1,283 @@
+import os
+import tempfile
+import re
+
+import pexpect
+
+from abi_error import AbiConverterError, AbiCommandError
+
+
+TYPES_IN = {'602': '602', 'abw': 'abw', 'aw': 'aw',
+ 'awt': 'awt', 'cwk': 'cwk', 'dbk': 'dbk',
+ 'doc': 'doc', 'docm': 'docm', 'docx': 'docx',
+ 'dot': 'dot', 'dotm': 'dotm', 'dotx': 'dotx',
+ 'fo': 'fo', 'htm': 'htm', 'html': 'html',
+ 'hwp': 'hwp', 'isc': 'isc', 'iscii': 'iscii',
+ 'kwd': 'kwd', 'mif': 'mif', 'odt': 'odt',
+ 'opml': 'opml', 'ott': 'ott', 'pdb': 'pdb',
+ 'pdf': 'pdf', 'rtf': 'rtf', 'sdw': 'sdw',
+ 'stw': 'stw', 'sxw': 'sxw', 'text': 'text',
+ 'txt': 'txt', 'wml': 'wml', 'wp': 'wp',
+ 'wpd': 'wpd', 'wri': 'wri', 'xhtml': 'xhtml',
+ 'xml': 'xml', 'zabw': 'zabw'}
+
+TYPES_OUT = {'abw': 'abw', 'aw': 'aw', 'awt': 'awt',
+ 'dbk': 'dbk', 'doc': 'doc', 'eml': 'eml',
+ 'fo': 'fo', 'html': 'html', 'isc': 'isc',
+ 'iscii': 'iscii', 'kwd': 'kwd', 'latex': 'latex',
+ 'mht': 'mht', 'mif': 'mif', 'nroff': 'nroff',
+ 'nws': 'nws', 'odt': 'odt', 'pdb': 'pdb',
+ 'pdf': 'pdf', 'ps': 'ps', 'rtf': 'rtf',
+ 'sxw': 'sxw', 'text': 'text', 'txt': 'txt',
+ 'wml': 'wml', 'xml': 'xml', 'xml2ps': 'xml2ps',
+ 'zabw': 'zabw'}
+
+class AbiFileConverter(object):
+ """This let's you convert between all filetypes supperted by the
+ AbiWord program. Import type isn't checked, as AbiWord doesn't check
+ on extension, but on metadata.
+ """
+
+ def __init__(self, timeout=60):
+ self.id = None
+ self.timeout = timeout
+ self._start_abiword()
+
+ def _start_abiword(self):
+ """
+ Start abiword with the AbiCommand plugin, if not already started
+ """
+
+ # find the abiword executable
+ abicommand = None
+ for dir in os.environ['PATH'].split(':'):
+ if os.path.isfile(os.path.join(dir, 'abiword')):
+ abicommand = os.path.join(dir, 'abiword')
+ if not abicommand:
+ raise AbiConverterError('Can not find abiword executable')
+
+ # start the abiword executable
+ try:
+ self.child = pexpect.spawn(abicommand + ' --plugin AbiCommand')
+ self.child.expect(
+ 'AbiWord command line plugin: Type "quit" to exit', 10)
+ except:
+ raise AbiConverterError('Can not open abiword executable')
+
+ def stop_abiword(self):
+ """
+ Stop the running abiword, kill it if necessary
+ """
+ self.child.sendline('quit')
+ if self._is_running():
+ os.kill(self.child.pid, 9)
+
+ def _is_running(self):
+ """
+ Test to see if abiword is running
+ """
+ try:
+ self.child.sendline('writepid /dev/null')
+ self.child.expect('OK', 1)
+ return True
+ except:
+ return False
+
+ def convert_file(self, in_file, out_file=None, type=None):
+ """
+ Convert a file. If out_file is not specified, a byte string is
+ returned. If type is not specified, the file extension from out_file is
+ used to determine the type. If this fails, the type 'text' is used.
+ Return value is -1 if an error occurred.
+ """
+ # is the out_file specified?
+ return_bytes = False
+ if out_file is None:
+ out_file = tempfile.mktemp(prefix="abiconvert_")
+ return_bytes = True
+
+ # is the type specified
+ type = TYPES_OUT.get(
+ type or os.path.splitext(out_file)[1][1:], 'txt')
+
+ # do the coversion
+ self._perform_conversion(in_file, out_file, type)
+
+ # return a byte string if no out_file is specified
+ if return_bytes:
+ fp = open(out_file, 'r')
+ bytes = fp.read()
+ fp.close()
+ os.remove(out_file)
+ return bytes
+
+ def _perform_conversion(self, in_file, out_file, type):
+ """
+ Do the actual conversion
+ """
+ # make sure we are up and running
+ if not self._is_running:
+ self._start_abiword()
+
+ # convert the file
+ cmd = 'convert %s %s %s' % (os.path.abspath(in_file),
+ os.path.abspath(out_file), type)
+ self.child.sendline(cmd)
+
+ # Check for errors
+ i = self.child.expect(['OK', pexpect.TIMEOUT])
+ if i != 0:
+ raise AbiCommandError('Error performing AbiCommand: %s' %cmd)
+
+ def convert_to_html(self, input):
+ """
+ Convert input file to HTML
+ """
+
+ from tempfile import mkstemp,mkdtemp
+
+ THE_OUTDIR = "outdir"
+ THE_OUTFILE = "outfile"
+ THE_INDIR = "indir"
+ THE_INFILE = "infile"
+
+ infile = None
+ outfile = None
+ out_f = None
+ try:
+ # create in/out files
+ temp_dir = mkdtemp(prefix="cm_")
+
+ # in
+ indir_name = os.path.join(temp_dir, THE_INDIR)
+ os.mkdir(indir_name)
+ infile_name = os.path.join(indir_name, THE_INFILE)
+
+ # out
+ outdir_name = os.path.join(temp_dir, THE_OUTDIR)
+ os.mkdir(outdir_name)
+ outfile_name = os.path.join(outdir_name, THE_OUTFILE)
+
+ # write infile
+ infile = open(infile_name,'w')
+ if type(input) == unicode:
+ input = input.encode('utf8')
+ infile.write(input)
+ infile.close()
+
+ # fix perms
+ # TODO group permission should suffice
+ os.chmod(temp_dir, 0755) # read
+ os.chmod(indir_name, 0755) # read
+ os.chmod(infile_name, 0755) # read
+ os.chmod(outdir_name, 0777) # read / write
+
+ # Do the job
+ self.convert_file(infile_name, outfile_name, 'html')
+
+ out_f = open(outfile_name,'r')
+ output = out_f.read()
+
+ # load other files (useful only for html)
+ img_res = []
+ if os.path.isdir(outdir_name + '/' + THE_OUTFILE + '_files'):
+ image_names = [name for name in os.listdir(outdir_name + '/' + THE_OUTFILE + '_files') if name != THE_OUTFILE]
+ for image_name in image_names:
+ img_res.append(os.path.join(outdir_name + '/' + THE_OUTFILE + '_files', image_name))
+
+ # clean images paths
+ output = re.sub(r'<img(.+src=")outfile_files/([^"]+")', r'<img\1\2', output);
+ output = re.sub(r'<img(.+)style="width:[\d\.]+mm"', r'<img\1', output);
+ return output,img_res
+
+ finally:
+ try:
+ if out_f:
+ out_f.close()
+ if infile:
+ infile.close()
+ except:
+ pass
+
+ def convert_from_html(self, input, format):
+ """
+ Convert input file from HTML
+ """
+
+ from tempfile import mkstemp,mkdtemp
+
+ THE_OUTDIR = "outdir"
+ THE_OUTFILE = "outfile"
+ THE_INDIR = "indir"
+ THE_INFILE = "infile"
+
+ infile = None
+ outfile = None
+ out_f = None
+ try:
+ # create in/out files
+ temp_dir = mkdtemp(prefix="cm_")
+
+ # in
+ indir_name = os.path.join(temp_dir, THE_INDIR)
+ os.mkdir(indir_name)
+ infile_name = os.path.join(indir_name, THE_INFILE + '.html')
+
+ # out
+ outdir_name = os.path.join(temp_dir, THE_OUTDIR)
+ os.mkdir(outdir_name)
+ outfile_name = os.path.join(outdir_name, THE_OUTFILE)
+
+ # write infile
+ infile = open(infile_name,'w')
+ if type(input) == unicode:
+ input = input.encode('utf8')
+ infile.write(input)
+ infile.close()
+
+ # fix perms
+ # TODO group permission should suffice
+ os.chmod(temp_dir, 0755) # read
+ os.chmod(indir_name, 0755) # read
+ os.chmod(infile_name, 0755) # read
+ os.chmod(outdir_name, 0777) # read / write
+
+ # Do the job
+ self.convert_file(infile_name, outfile_name, format)
+
+ out_f = open(outfile_name,'r')
+ output = out_f.read()
+ return output
+
+ finally:
+ try:
+ if out_f:
+ out_f.close()
+ if infile:
+ infile.close()
+ #top = temp_dir
+ #for root, dirs, files in os.walk(top, topdown=False):
+ # for name in files:
+ # os.remove(os.path.join(root, name))
+ # for name in dirs:
+ # os.rmdir(os.path.join(root, name))
+ #os.rmdir(top)
+ except:
+ pass
+
+ def add_html_header(self, body):
+ """
+ Add an HTML header to an HTML body
+ """
+
+ return """
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8" />
+ </head>
+ <body>
+ %s
+ </body>
+</html>
+""" %body
+
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cm/converters/abi_error.py Thu Jul 21 09:42:33 2011 +0200
@@ -0,0 +1,9 @@
+
+class AbiConverterError(Exception):
+ pass
+
+class AbiCommandError(Exception):
+ pass
+
+class ToolsConverterError(Exception):
+ pass
--- a/src/cm/main.py Thu Jun 16 09:27:56 2011 +0200
+++ b/src/cm/main.py Thu Jul 21 09:42:33 2011 +0200
@@ -12,4 +12,4 @@
# add ch to logger
logger.addHandler(ch)
-logger_config()
\ No newline at end of file
+logger_config()
--- a/src/cm/utils/comment_positioning.py Thu Jun 16 09:27:56 2011 +0200
+++ b/src/cm/utils/comment_positioning.py Thu Jul 21 09:42:33 2011 +0200
@@ -127,7 +127,7 @@
if with_markers:
end_ids.reverse()
- ret = "%s%s%s"%(''.join(["[%s>"%start_id for start_id in start_ids]), ret, ''.join(["<%s]"%end_id for end_id in end_ids]))
+ ret = "%s%s%s"%(''.join(["[%s>"%start_id for start_id in start_ids]), ret, ''.join(["<%s]"%end_id for end_id in end_ids]))
if with_colors and color != 0 :
ret = "<span style='background-color:%s;'>%s</span>"%(BCKCOLORS[color], ret)
@@ -243,4 +243,4 @@
# top_comment_cpt = top_comment_cpt + 1
#
# ret = "%s%s%s"%("""<div class="pagebreakhere">""", html_comments, """</div>""")
-# return ret
\ No newline at end of file
+# return ret
--- a/src/cm/views/export.py Thu Jun 16 09:27:56 2011 +0200
+++ b/src/cm/views/export.py Thu Jul 21 09:42:33 2011 +0200
@@ -8,6 +8,7 @@
from cm.models import Text, TextVersion, Attachment, Comment
import mimetypes
import simplejson
+from cm.cm_settings import USE_ABI
EXPORT2_INFOS = {
# key -> { mimetype, extension}
's5' : {},
@@ -34,10 +35,20 @@
else :
fix_content = content
if content_format == 'html':
- from cm.converters.oo_converters import combine_css_body
- fix_content = combine_css_body(content, '')
- from cm.converters.oo_converters import convert_html as oo_convert
- export_content = oo_convert(fix_content, format)
+ if USE_ABI:
+ from cm.converters.abi_converters import AbiFileConverter
+ converter = AbiFileConverter()
+ fix_content = converter.add_html_header(content)
+ else:
+ from cm.converters.oo_converters import combine_css_body
+ fix_content = combine_css_body(content, '')
+ if USE_ABI:
+ from cm.converters.abi_converters import AbiFileConverter
+ converter = AbiFileConverter()
+ export_content = converter.convert_from_html(fix_content, format)
+ else:
+ from cm.converters.oo_converters import convert_html as oo_convert
+ export_content = oo_convert(fix_content, format)
export_infos = EXPORT2_INFOS[format]
@@ -189,4 +200,4 @@
return content_export(request, text_version.content, text_version.title, text_version.format, format)
def text_feed(request, key):
- return ""
\ No newline at end of file
+ return ""