src/cm/converters/abi_converters.py
changeset 360 bfaab8740995
child 361 5f2a1237050a
equal deleted inserted replaced
359:0bab4ef95bfe 360:bfaab8740995
       
     1 import os
       
     2 import tempfile
       
     3 import re
       
     4 
       
     5 import pexpect
       
     6 
       
     7 from abi_error import AbiConverterError, AbiCommandError
       
     8 
       
     9 
       
    10 TYPES_IN  = {'602': '602',       'abw': 'abw',       'aw': 'aw',     
       
    11              'awt': 'awt',       'cwk': 'cwk',       'dbk': 'dbk',   
       
    12              'doc': 'doc',       'docm': 'docm',     'docx': 'docx', 
       
    13              'dot': 'dot',       'dotm': 'dotm',     'dotx': 'dotx',
       
    14              'fo': 'fo',         'htm': 'htm',       'html': 'html', 
       
    15              'hwp': 'hwp',       'isc': 'isc',       'iscii': 'iscii',   
       
    16              'kwd': 'kwd',       'mif': 'mif',       'odt': 'odt',
       
    17              'opml': 'opml',     'ott': 'ott',       'pdb': 'pdb',
       
    18              'pdf': 'pdf',       'rtf': 'rtf',       'sdw': 'sdw',
       
    19              'stw': 'stw',       'sxw': 'sxw',       'text': 'text',
       
    20              'txt': 'txt',       'wml': 'wml',       'wp': 'wp',
       
    21              'wpd': 'wpd',       'wri': 'wri',       'xhtml': 'xhtml',
       
    22              'xml': 'xml',       'zabw': 'zabw'}
       
    23 
       
    24 TYPES_OUT = {'abw': 'abw',       'aw': 'aw',         'awt': 'awt',
       
    25              'dbk': 'dbk',       'doc': 'doc',       'eml': 'eml',
       
    26              'fo': 'fo',         'html': 'html',     'isc': 'isc',
       
    27              'iscii': 'iscii',   'kwd': 'kwd',       'latex': 'latex',
       
    28              'mht': 'mht',       'mif': 'mif',       'nroff': 'nroff',
       
    29              'nws': 'nws',       'odt': 'odt',       'pdb': 'pdb',
       
    30              'pdf': 'pdf',       'ps': 'ps',         'rtf': 'rtf',
       
    31              'sxw': 'sxw',       'text': 'text',     'txt': 'txt',
       
    32              'wml': 'wml',       'xml': 'xml',       'xml2ps': 'xml2ps',
       
    33              'zabw': 'zabw'}
       
    34 
       
    35 class AbiFileConverter(object):
       
    36     """This let's you convert between all filetypes supperted by the 
       
    37     AbiWord program. Import type isn't checked, as AbiWord doesn't check 
       
    38     on extension, but on metadata.
       
    39     """
       
    40 
       
    41     def __init__(self, timeout=60):
       
    42         self.id = None
       
    43         self.timeout = timeout
       
    44         self._start_abiword()
       
    45 
       
    46     def _start_abiword(self):
       
    47         """
       
    48         Start abiword with the AbiCommand plugin, if not already started
       
    49         """
       
    50 
       
    51         # find the abiword executable
       
    52         abicommand = None
       
    53         for dir in os.environ['PATH'].split(':'):
       
    54             if os.path.isfile(os.path.join(dir, 'abiword')):
       
    55                 abicommand = os.path.join(dir, 'abiword')
       
    56         if not abicommand:
       
    57             raise AbiConverterError('Can not find abiword executable')
       
    58 
       
    59         # start the abiword executable
       
    60         try:
       
    61             self.child = pexpect.spawn(abicommand + ' --plugin AbiCommand')
       
    62             self.child.expect(
       
    63                     'AbiWord command line plugin: Type "quit" to exit', 10)
       
    64         except:
       
    65             raise AbiConverterError('Can not open abiword executable')
       
    66 
       
    67     def stop_abiword(self):
       
    68         """
       
    69         Stop the running abiword, kill it if necessary
       
    70         """
       
    71         self.child.sendline('quit')
       
    72         if self._is_running():
       
    73             os.kill(self.child.pid, 9)
       
    74 
       
    75     def _is_running(self):
       
    76         """
       
    77         Test to see if abiword is running
       
    78         """
       
    79         try:
       
    80             self.child.sendline('writepid /dev/null')
       
    81             self.child.expect('OK', 1)
       
    82             return True
       
    83         except:
       
    84             return False
       
    85 
       
    86     def convert_file(self, in_file, out_file=None, type=None):
       
    87         """
       
    88         Convert a file. If out_file is not specified, a byte string is 
       
    89         returned. If type is not specified, the file extension from out_file is
       
    90         used to determine the type. If this fails, the type 'text' is used.
       
    91         Return value is -1 if an error occurred.
       
    92         """
       
    93         # is the out_file specified?
       
    94         return_bytes = False
       
    95         if out_file is None:
       
    96             out_file = tempfile.mktemp(prefix="abiconvert_")
       
    97             return_bytes = True
       
    98             
       
    99         # is the type specified
       
   100         type = TYPES_OUT.get(
       
   101             type or os.path.splitext(out_file)[1][1:], 'txt')
       
   102 
       
   103         # do the coversion
       
   104         self._perform_conversion(in_file, out_file, type)
       
   105 
       
   106         # return a byte string if no out_file is specified
       
   107         if return_bytes:
       
   108             fp = open(out_file,  'r')
       
   109             bytes = fp.read()
       
   110             fp.close()
       
   111             os.remove(out_file)
       
   112             return bytes
       
   113 
       
   114     def _perform_conversion(self, in_file, out_file, type):
       
   115         """
       
   116         Do the actual conversion
       
   117         """
       
   118         # make sure we are up and running 
       
   119         if not self._is_running:
       
   120             self._start_abiword()
       
   121 
       
   122         # convert the file
       
   123         cmd = 'convert %s %s %s' % (os.path.abspath(in_file), 
       
   124                                     os.path.abspath(out_file), type)
       
   125         self.child.sendline(cmd)
       
   126 
       
   127         # Check for errors
       
   128         i = self.child.expect(['OK', pexpect.TIMEOUT])
       
   129         if i != 0:
       
   130             raise AbiCommandError('Error performing AbiCommand: %s' %cmd)
       
   131 
       
   132     def convert_to_html(self, input):
       
   133         """ 
       
   134         Convert input file to HTML
       
   135         """
       
   136 
       
   137         from tempfile import mkstemp,mkdtemp
       
   138 
       
   139         THE_OUTDIR = "outdir"
       
   140         THE_OUTFILE = "outfile"
       
   141         THE_INDIR = "indir"
       
   142         THE_INFILE = "infile"
       
   143 
       
   144         infile = None
       
   145         outfile = None
       
   146         out_f = None
       
   147         try:
       
   148           # create in/out files
       
   149           temp_dir = mkdtemp(prefix="cm_")
       
   150 
       
   151           # in
       
   152           indir_name = os.path.join(temp_dir, THE_INDIR)
       
   153           os.mkdir(indir_name)
       
   154           infile_name = os.path.join(indir_name, THE_INFILE)
       
   155 
       
   156           # out
       
   157           outdir_name = os.path.join(temp_dir, THE_OUTDIR)
       
   158           os.mkdir(outdir_name)
       
   159           outfile_name = os.path.join(outdir_name, THE_OUTFILE)
       
   160 
       
   161           # write infile 
       
   162           infile = open(infile_name,'w')
       
   163           if type(input) == unicode:
       
   164             input = input.encode('utf8')
       
   165           infile.write(input)
       
   166           infile.close()
       
   167 
       
   168           # fix perms
       
   169           # TODO group permission should suffice
       
   170           os.chmod(temp_dir, 0755) # read        
       
   171           os.chmod(indir_name, 0755) # read        
       
   172           os.chmod(infile_name, 0755) # read
       
   173           os.chmod(outdir_name, 0777) # read / write
       
   174 
       
   175           # Do the job
       
   176           self.convert_file(infile_name, outfile_name, 'html')
       
   177 
       
   178           out_f = open(outfile_name,'r')
       
   179           output = out_f.read()
       
   180 
       
   181           # load other files (useful only for html)
       
   182           img_res = [] 
       
   183           if os.path.isdir(outdir_name + '/' + THE_OUTFILE + '_files'):
       
   184             image_names = [name for name in os.listdir(outdir_name + '/' + THE_OUTFILE + '_files') if name != THE_OUTFILE]
       
   185             for image_name in image_names:
       
   186               img_res.append(os.path.join(outdir_name + '/' + THE_OUTFILE + '_files', image_name))
       
   187 
       
   188             # clean images paths
       
   189             output = re.sub(r'<img(.+src=")outfile_files/([^"]+")', r'<img\1\2', output);
       
   190             output = re.sub(r'<img(.+)style="width:[\d\.]+mm"', r'<img\1', output);
       
   191           return output,img_res
       
   192 
       
   193         finally:
       
   194           try:
       
   195             if out_f:
       
   196                 out_f.close()
       
   197             if infile:
       
   198                 infile.close()
       
   199           except:
       
   200             pass
       
   201 
       
   202     def convert_from_html(self, input, format):
       
   203         """ 
       
   204         Convert input file from HTML
       
   205         """
       
   206 
       
   207         from tempfile import mkstemp,mkdtemp
       
   208 
       
   209         THE_OUTDIR = "outdir"
       
   210         THE_OUTFILE = "outfile"
       
   211         THE_INDIR = "indir"
       
   212         THE_INFILE = "infile"
       
   213 
       
   214         infile = None
       
   215         outfile = None
       
   216         out_f = None
       
   217         try:
       
   218           # create in/out files
       
   219           temp_dir = mkdtemp(prefix="cm_")
       
   220 
       
   221           # in
       
   222           indir_name = os.path.join(temp_dir, THE_INDIR)
       
   223           os.mkdir(indir_name)
       
   224           infile_name = os.path.join(indir_name, THE_INFILE + '.html')
       
   225 
       
   226           # out
       
   227           outdir_name = os.path.join(temp_dir, THE_OUTDIR)
       
   228           os.mkdir(outdir_name)
       
   229           outfile_name = os.path.join(outdir_name, THE_OUTFILE)
       
   230 
       
   231           # write infile 
       
   232           infile = open(infile_name,'w')
       
   233           if type(input) == unicode:
       
   234             input = input.encode('utf8')
       
   235           infile.write(input)
       
   236           infile.close()
       
   237 
       
   238           # fix perms
       
   239           # TODO group permission should suffice
       
   240           os.chmod(temp_dir, 0755) # read        
       
   241           os.chmod(indir_name, 0755) # read        
       
   242           os.chmod(infile_name, 0755) # read
       
   243           os.chmod(outdir_name, 0777) # read / write
       
   244 
       
   245           # Do the job
       
   246           self.convert_file(infile_name, outfile_name, format)
       
   247 
       
   248           out_f = open(outfile_name,'r')
       
   249           output = out_f.read()
       
   250           return output
       
   251 
       
   252         finally:
       
   253           try:
       
   254             if out_f:
       
   255                 out_f.close()
       
   256             if infile:
       
   257                 infile.close()
       
   258             #top = temp_dir
       
   259             #for root, dirs, files in os.walk(top, topdown=False):
       
   260             #    for name in files:
       
   261             #        os.remove(os.path.join(root, name))
       
   262             #    for name in dirs:
       
   263             #        os.rmdir(os.path.join(root, name))
       
   264             #os.rmdir(top)
       
   265           except:
       
   266             pass
       
   267 
       
   268     def add_html_header(self, body):
       
   269         """ 
       
   270         Add an HTML header to an HTML body
       
   271         """
       
   272 
       
   273         return """
       
   274 <html xmlns="http://www.w3.org/1999/xhtml">
       
   275     <head>
       
   276         <meta http-equiv="content-type" content="text/html; charset=utf-8" />
       
   277     </head>
       
   278     <body>
       
   279         %s
       
   280     </body>
       
   281 </html>
       
   282 """ %body
       
   283