src/cm/converters/abi_converters.py
author gibus
Tue, 22 Oct 2013 15:00:56 +0200
changeset 555 5d79dc4e50a3
parent 457 f62f7f0bcaa4
permissions -rw-r--r--
When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
     1
import os
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
     2
import tempfile
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
     3
import re
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
     4
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
     5
import pexpect
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
     6
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
     7
from abi_error import AbiConverterError, AbiCommandError
457
f62f7f0bcaa4 Fixed export with abiword by running tidy on html
gibus
parents: 456
diff changeset
     8
from cm.converters.pandoc_converters import do_tidy
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
     9
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    10
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    11
TYPES_IN  = {'602': '602',       'abw': 'abw',       'aw': 'aw',     
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    12
             'awt': 'awt',       'cwk': 'cwk',       'dbk': 'dbk',   
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    13
             'doc': 'doc',       'docm': 'docm',     'docx': 'docx', 
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    14
             'dot': 'dot',       'dotm': 'dotm',     'dotx': 'dotx',
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    15
             'fo': 'fo',         'htm': 'htm',       'html': 'html', 
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    16
             'hwp': 'hwp',       'isc': 'isc',       'iscii': 'iscii',   
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    17
             'kwd': 'kwd',       'mif': 'mif',       'odt': 'odt',
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    18
             'opml': 'opml',     'ott': 'ott',       'pdb': 'pdb',
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    19
             'pdf': 'pdf',       'rtf': 'rtf',       'sdw': 'sdw',
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    20
             'stw': 'stw',       'sxw': 'sxw',       'text': 'text',
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    21
             'txt': 'txt',       'wml': 'wml',       'wp': 'wp',
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    22
             'wpd': 'wpd',       'wri': 'wri',       'xhtml': 'xhtml',
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    23
             'xml': 'xml',       'zabw': 'zabw'}
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    24
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    25
TYPES_OUT = {'abw': 'abw',       'aw': 'aw',         'awt': 'awt',
433
056d92bffb23 Added export in .doc and .docx formats.
gibus
parents: 374
diff changeset
    26
             'dbk': 'dbk',       'doc': 'doc',       'docx': 'docx',
056d92bffb23 Added export in .doc and .docx formats.
gibus
parents: 374
diff changeset
    27
             'eml': 'eml',       'fo': 'fo',         'html': 'html',
056d92bffb23 Added export in .doc and .docx formats.
gibus
parents: 374
diff changeset
    28
             'isc': 'isc',       'iscii': 'iscii',   'kwd': 'kwd',
056d92bffb23 Added export in .doc and .docx formats.
gibus
parents: 374
diff changeset
    29
             'latex': 'latex',   'mht': 'mht',       'mif': 'mif',
056d92bffb23 Added export in .doc and .docx formats.
gibus
parents: 374
diff changeset
    30
             'nroff': 'nroff',   'nws': 'nws',       'odt': 'odt',
056d92bffb23 Added export in .doc and .docx formats.
gibus
parents: 374
diff changeset
    31
             'pdb': 'pdb',       'pdf': 'pdf',       'ps': 'ps',
056d92bffb23 Added export in .doc and .docx formats.
gibus
parents: 374
diff changeset
    32
             'rtf': 'rtf',       'sxw': 'sxw',       'text': 'text',
056d92bffb23 Added export in .doc and .docx formats.
gibus
parents: 374
diff changeset
    33
             'txt': 'txt',       'wml': 'wml',       'xml': 'xml',
056d92bffb23 Added export in .doc and .docx formats.
gibus
parents: 374
diff changeset
    34
             'xml2ps': 'xml2ps', 'zabw': 'zabw'}
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    35
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    36
class AbiFileConverter(object):
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    37
    """This let's you convert between all filetypes supperted by the 
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    38
    AbiWord program. Import type isn't checked, as AbiWord doesn't check 
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    39
    on extension, but on metadata.
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    40
    """
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    41
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    42
    def __init__(self, timeout=60):
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    43
        self.id = None
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    44
        self.timeout = timeout
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    45
        self._start_abiword()
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    46
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    47
    def _start_abiword(self):
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    48
        """
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    49
        Start abiword with the AbiCommand plugin, if not already started
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    50
        """
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    51
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    52
        # find the abiword executable
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    53
        abicommand = None
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    54
        for dir in os.environ['PATH'].split(':'):
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    55
            if os.path.isfile(os.path.join(dir, 'abiword')):
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    56
                abicommand = os.path.join(dir, 'abiword')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    57
        if not abicommand:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    58
            raise AbiConverterError('Can not find abiword executable')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    59
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    60
        # start the abiword executable
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    61
        try:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    62
            self.child = pexpect.spawn(abicommand + ' --plugin AbiCommand')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    63
            self.child.expect(
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    64
                    'AbiWord command line plugin: Type "quit" to exit', 10)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    65
        except:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    66
            raise AbiConverterError('Can not open abiword executable')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    67
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    68
    def stop_abiword(self):
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    69
        """
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    70
        Stop the running abiword, kill it if necessary
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    71
        """
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    72
        self.child.sendline('quit')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    73
        if self._is_running():
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    74
            os.kill(self.child.pid, 9)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    75
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    76
    def _is_running(self):
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    77
        """
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    78
        Test to see if abiword is running
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    79
        """
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    80
        try:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    81
            self.child.sendline('writepid /dev/null')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    82
            self.child.expect('OK', 1)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    83
            return True
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    84
        except:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    85
            return False
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    86
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    87
    def convert_file(self, in_file, out_file=None, type=None):
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    88
        """
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    89
        Convert a file. If out_file is not specified, a byte string is 
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    90
        returned. If type is not specified, the file extension from out_file is
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    91
        used to determine the type. If this fails, the type 'text' is used.
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    92
        Return value is -1 if an error occurred.
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    93
        """
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    94
        # is the out_file specified?
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    95
        return_bytes = False
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    96
        if out_file is None:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    97
            out_file = tempfile.mktemp(prefix="abiconvert_")
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    98
            return_bytes = True
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
    99
            
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   100
        # is the type specified
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   101
        type = TYPES_OUT.get(
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   102
            type or os.path.splitext(out_file)[1][1:], 'txt')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   103
442
b6e443be2a9b Takes into account various releases of pandoc.
gibus
parents: 433
diff changeset
   104
        # do the conversion
555
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   105
        try:
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   106
          self._perform_conversion(in_file, out_file, type)
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   107
        except:
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   108
          raise
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   109
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   110
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   111
        # return a byte string if no out_file is specified
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   112
        if return_bytes:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   113
            fp = open(out_file,  'r')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   114
            bytes = fp.read()
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   115
            fp.close()
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   116
            os.remove(out_file)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   117
            return bytes
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   118
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   119
    def _perform_conversion(self, in_file, out_file, type):
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   120
        """
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   121
        Do the actual conversion
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   122
        """
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   123
        # make sure we are up and running 
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   124
        if not self._is_running:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   125
            self._start_abiword()
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   126
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   127
        # convert the file
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   128
        cmd = 'convert %s %s %s' % (os.path.abspath(in_file), 
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   129
                                    os.path.abspath(out_file), type)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   130
        self.child.sendline(cmd)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   131
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   132
        # Check for errors
555
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   133
        i = 1
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   134
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   135
        try:
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   136
          i = self.child.expect(['OK', pexpect.TIMEOUT])
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   137
          if i != 0:
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   138
            raise AbiCommandError('Error performing AbiCommand: %s' %cmd)
555
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   139
        except:
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   140
          raise
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   141
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   142
    def convert_to_html(self, input):
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   143
        """ 
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   144
        Convert input file to HTML
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   145
        """
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   146
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   147
        from tempfile import mkstemp,mkdtemp
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   148
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   149
        THE_OUTDIR = "outdir"
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   150
        THE_OUTFILE = "outfile"
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   151
        THE_INDIR = "indir"
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   152
        THE_INFILE = "infile"
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   153
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   154
        infile = None
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   155
        outfile = None
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   156
        out_f = None
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   157
        try:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   158
          # create in/out files
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   159
          temp_dir = mkdtemp(prefix="cm_")
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   160
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   161
          # in
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   162
          indir_name = os.path.join(temp_dir, THE_INDIR)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   163
          os.mkdir(indir_name)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   164
          infile_name = os.path.join(indir_name, THE_INFILE)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   165
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   166
          # out
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   167
          outdir_name = os.path.join(temp_dir, THE_OUTDIR)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   168
          os.mkdir(outdir_name)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   169
          outfile_name = os.path.join(outdir_name, THE_OUTFILE)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   170
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   171
          # write infile 
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   172
          infile = open(infile_name,'w')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   173
          if type(input) == unicode:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   174
            input = input.encode('utf8')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   175
          infile.write(input)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   176
          infile.close()
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   177
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   178
          # fix perms
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   179
          # TODO group permission should suffice
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   180
          os.chmod(temp_dir, 0755) # read        
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   181
          os.chmod(indir_name, 0755) # read        
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   182
          os.chmod(infile_name, 0755) # read
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   183
          os.chmod(outdir_name, 0777) # read / write
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   184
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   185
          # Do the job
555
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   186
          try:
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   187
            self.convert_file(infile_name, outfile_name, 'html')
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   188
          except:
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   189
            raise
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   190
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   191
          out_f = open(outfile_name,'r')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   192
          output = out_f.read()
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   193
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   194
          # load other files (useful only for html)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   195
          img_res = [] 
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   196
          if os.path.isdir(outdir_name + '/' + THE_OUTFILE + '_files'):
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   197
            image_names = [name for name in os.listdir(outdir_name + '/' + THE_OUTFILE + '_files') if name != THE_OUTFILE]
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   198
            for image_name in image_names:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   199
              img_res.append(os.path.join(outdir_name + '/' + THE_OUTFILE + '_files', image_name))
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   200
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   201
            # clean images paths
374
aa71c04f5832 semicolon useless in python
gibus
parents: 366
diff changeset
   202
            output = re.sub(r'<img(.+src=")outfile_files/([^"]+")', r'<img\1\2', output)
aa71c04f5832 semicolon useless in python
gibus
parents: 366
diff changeset
   203
            output = re.sub(r'<img(.+)style="width:[\d\.]+mm"', r'<img\1', output)
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   204
          return output,img_res
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   205
555
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   206
        except Exception as inst:
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   207
          pass
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   208
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   209
        finally:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   210
          try:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   211
            if out_f:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   212
                out_f.close()
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   213
            if infile:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   214
                infile.close()
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   215
          except:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   216
            pass
555
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   217
          if inst:
5d79dc4e50a3 When creating from uploaded file (in ms-word for eg.), try libroffice in case abiword fails.
gibus
parents: 457
diff changeset
   218
            raise inst
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   219
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   220
    def convert_from_html(self, input, format):
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   221
        """ 
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   222
        Convert input file from HTML
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   223
        """
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   224
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   225
        from tempfile import mkstemp,mkdtemp
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   226
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   227
        THE_OUTDIR = "outdir"
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   228
        THE_OUTFILE = "outfile"
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   229
        THE_INDIR = "indir"
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   230
        THE_INFILE = "infile"
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   231
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   232
        infile = None
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   233
        outfile = None
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   234
        out_f = None
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   235
        try:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   236
          # create in/out files
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   237
          temp_dir = mkdtemp(prefix="cm_")
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   238
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   239
          # in
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   240
          indir_name = os.path.join(temp_dir, THE_INDIR)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   241
          os.mkdir(indir_name)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   242
          infile_name = os.path.join(indir_name, THE_INFILE + '.html')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   243
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   244
          # out
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   245
          outdir_name = os.path.join(temp_dir, THE_OUTDIR)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   246
          os.mkdir(outdir_name)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   247
          outfile_name = os.path.join(outdir_name, THE_OUTFILE)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   248
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   249
          # write infile 
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   250
          infile = open(infile_name,'w')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   251
          if type(input) == unicode:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   252
            input = input.encode('utf8')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   253
          infile.write(input)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   254
          infile.close()
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   255
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   256
          # fix perms
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   257
          # TODO group permission should suffice
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   258
          os.chmod(temp_dir, 0755) # read        
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   259
          os.chmod(indir_name, 0755) # read        
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   260
          os.chmod(infile_name, 0755) # read
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   261
          os.chmod(outdir_name, 0777) # read / write
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   262
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   263
          # Do the job
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   264
          self.convert_file(infile_name, outfile_name, format)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   265
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   266
          out_f = open(outfile_name,'r')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   267
          output = out_f.read()
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   268
          return output
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   269
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   270
        finally:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   271
          try:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   272
            if out_f:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   273
                out_f.close()
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   274
            if infile:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   275
                infile.close()
361
5f2a1237050a Delete temporary files
gibus
parents: 360
diff changeset
   276
            top = temp_dir
366
98af3be91847 For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 365
diff changeset
   277
            for root, dirs, files in os.walk(top, topdown=False):
98af3be91847 For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 365
diff changeset
   278
                for name in files:
98af3be91847 For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 365
diff changeset
   279
                    os.remove(os.path.join(root, name))
98af3be91847 For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 365
diff changeset
   280
                for name in dirs:
98af3be91847 For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 365
diff changeset
   281
                    os.rmdir(os.path.join(root, name))
98af3be91847 For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 365
diff changeset
   282
            os.rmdir(top)
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   283
          except:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   284
            pass
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   285
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   286
    def add_html_header(self, body):
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   287
        """ 
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   288
        Add an HTML header to an HTML body
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   289
        """
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   290
454
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   291
        if '<html' in body and '<body' in body:
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   292
          full_html = body
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   293
        else:
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   294
          full_html = """
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   295
<html xmlns="http://www.w3.org/1999/xhtml">
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   296
    <head>
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   297
        <meta http-equiv="content-type" content="text/html; charset=utf-8" />
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   298
    </head>
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   299
    <body>
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   300
        %s
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   301
    </body>
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   302
</html>
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   303
""" %body
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents:
diff changeset
   304
454
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   305
        # Adds some style to fix Abiword default margins for paragraphs.
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   306
        from BeautifulSoup import BeautifulSoup
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   307
        import cssutils
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   308
        soup = BeautifulSoup(full_html)
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   309
        for p in soup.findAll(['p', 'div', 'ul', 'ol', 'dl']):
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   310
          try:
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   311
            css = p['style']
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   312
            s = cssutils.parseStyle(css)
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   313
            if s.getProperty('margin') == None:
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   314
              if s.getProperty('margin-top') == None:
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   315
                s.setProperty('margin-top', '10pt')
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   316
              if s.getProperty('margin-bottom') == None:
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   317
                s.setProperty('margin-bottom', '10pt')
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   318
            p['style'] = s.cssText
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   319
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   320
          except KeyError:
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   321
            p['style'] = 'margin-top: 10pt; margin-bottom: 10pt;';
b7a092a52eae Cleaned export.
gibus
parents: 442
diff changeset
   322
457
f62f7f0bcaa4 Fixed export with abiword by running tidy on html
gibus
parents: 456
diff changeset
   323
        return do_tidy(unicode(soup))