src/cm/views/export.py
author gibus
Fri, 10 Aug 2012 16:12:29 +0200
changeset 460 2fdb7d095d5c
parent 455 33c7e20efcb7
child 494 6b3d72136e85
permissions -rw-r--r--
Added import from XML file, including text, comments and attachments.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     1
from django import forms
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     2
from django.core.urlresolvers import reverse
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     3
from django.http import HttpResponse, HttpResponseRedirect, Http404
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     4
from django.shortcuts import render_to_response
453
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
     5
from django.template.loader import render_to_string
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     6
from django.template import RequestContext
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     7
from django.utils.translation import ugettext as _, ugettext_lazy
453
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
     8
from django.contrib.auth.models import User
455
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
     9
from django.conf import settings
364
41dd28557b5d tidyfy html before conversion with abiword
gibus
parents: 363
diff changeset
    10
from cm.converters.pandoc_converters import pandoc_convert, do_tidy
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    11
from cm.models import Text, TextVersion, Attachment, Comment
453
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
    12
from cm.security import get_viewable_comments
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    13
import mimetypes
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    14
import simplejson
455
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
    15
import imghdr
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
    16
import base64
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
    17
import re
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 77
diff changeset
    18
from cm.cm_settings import USE_ABI
454
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    19
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    20
EXPORT2_INFOS = {
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    21
# key -> { mimetype, extension}
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    22
's5' :   {},
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    23
'pdf' :  {'mimetype': 'application/pdf', 'extension':'pdf'},
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    24
'markdown' :  {'mimetype': 'text/plain', 'extension':'mkd'},
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    25
'odt' :  {'mimetype': 'application/vnd.oasis.opendocument.text', 'extension':'odt'},
433
056d92bffb23 Added export in .doc and .docx formats.
gibus
parents: 367
diff changeset
    26
'doc' :  {'mimetype': 'application/msword', 'extension':'doc'},
056d92bffb23 Added export in .doc and .docx formats.
gibus
parents: 367
diff changeset
    27
'docx' :  {'mimetype': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'extension':'docx'},
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    28
'latex' :{'mimetype': 'text/x-tex', 'extension':'tex'},
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    29
'html' :{'mimetype': 'text/html', 'extension':'html'},
443
cacd524f5279 Adds export to epub.
gibus
parents: 441
diff changeset
    30
'epub' :{'mimetype': 'application/epub+zip', 'extension':'epub'},
453
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
    31
'raw' : {'mimetype': 'text/plain', 'extension':'txt'},
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
    32
'xml' : {'mimetype': 'text/xml', 'extension':'xml'},
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    33
}
454
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    34
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    35
HTML_HEADER = u"""
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    36
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    37
<html><head>
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    38
<STYLE TYPE='text/css'>
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    39
div.pagebreakhere {
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    40
    page-break-before: always ;
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    41
}
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    42
</STYLE>
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    43
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/></head>
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    44
<body>%s</body>
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    45
</html>
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    46
"""
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    47
 
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    48
def content_export2(request, content, title, content_format, format, use_pandoc, download_response):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    49
    # TODO : formats must be imported from converters
56
bd8a4ffc7dad BUG FIX : pdf export PhA tests, TODO fix special html caracters in comments
reno
parents: 0
diff changeset
    50
#    import pdb;pdb.set_trace()
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    51
    if format == 'raw' :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    52
        export_content = content
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    53
    elif content_format == 'html' and format == 'html':
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    54
        export_content = HTML_HEADER % content
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    55
    elif content_format == 'markdown' and format == 'markdown':
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    56
        export_content = content
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    57
    else:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    58
        if use_pandoc :
367
e4a0c2fe8df2 markdown2pdf is buggy => convert to HTML and use abiword to export in PDF
gibus
parents: 364
diff changeset
    59
          # markdown2pdf is buggy => convert to HTML and use abiword to export in PDF
454
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    60
          if format in ('pdf', 'odt', 'docx', 'doc') and USE_ABI:
367
e4a0c2fe8df2 markdown2pdf is buggy => convert to HTML and use abiword to export in PDF
gibus
parents: 364
diff changeset
    61
            html_content = pandoc_convert(content, content_format, 'html', full=True)
e4a0c2fe8df2 markdown2pdf is buggy => convert to HTML and use abiword to export in PDF
gibus
parents: 364
diff changeset
    62
            from cm.converters.abi_converters import AbiFileConverter
e4a0c2fe8df2 markdown2pdf is buggy => convert to HTML and use abiword to export in PDF
gibus
parents: 364
diff changeset
    63
            converter = AbiFileConverter()
454
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    64
            fix_content = converter.add_html_header(html_content)
367
e4a0c2fe8df2 markdown2pdf is buggy => convert to HTML and use abiword to export in PDF
gibus
parents: 364
diff changeset
    65
            export_content = converter.convert_from_html(fix_content, format)
e4a0c2fe8df2 markdown2pdf is buggy => convert to HTML and use abiword to export in PDF
gibus
parents: 364
diff changeset
    66
          else:
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    67
            export_content = pandoc_convert(content, content_format, format, full=True)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    68
        else :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    69
            fix_content = content
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    70
            if content_format == 'html':
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 77
diff changeset
    71
                if USE_ABI:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 77
diff changeset
    72
                  from cm.converters.abi_converters import AbiFileConverter
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 77
diff changeset
    73
                  converter = AbiFileConverter()
454
b7a092a52eae Cleaned export.
gibus
parents: 453
diff changeset
    74
                  fix_content = converter.add_html_header(content)
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 77
diff changeset
    75
                else:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 77
diff changeset
    76
                  from cm.converters.oo_converters import combine_css_body                
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 77
diff changeset
    77
                  fix_content = combine_css_body(content, '')
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 77
diff changeset
    78
            if USE_ABI:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 77
diff changeset
    79
              from cm.converters.abi_converters import AbiFileConverter
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 77
diff changeset
    80
              converter = AbiFileConverter()
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 77
diff changeset
    81
              export_content = converter.convert_from_html(fix_content, format)
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 77
diff changeset
    82
            else:
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 77
diff changeset
    83
              from cm.converters.oo_converters import convert_html as oo_convert                
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 77
diff changeset
    84
              export_content = oo_convert(fix_content, format)
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    85
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    86
    export_infos = EXPORT2_INFOS[format]
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    87
     
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    88
    if download_response:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    89
        return _response_download(export_content, title, export_infos['mimetype'], export_infos['extension']) ;
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    90
    else:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    91
        return _response_write(export_content)    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    92
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    93
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    94
def _response_download(content, title, mimetype, extension):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    95
    response = HttpResponse(mimetype=mimetype)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    96
    file_title = title + '.' + extension
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    97
    from email.header import Header
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    98
    encoded_name = str(Header(file_title.encode('utf8'), charset='utf8', maxlinelen=500))
52
9fa013909d9a prevent header error if newline in header (pb not fully solved)
raph
parents: 0
diff changeset
    99
    # TODO: find a way to include long (more than 76 chars) into header
9fa013909d9a prevent header error if newline in header (pb not fully solved)
raph
parents: 0
diff changeset
   100
    encoded_name = encoded_name.replace('\n','')
9fa013909d9a prevent header error if newline in header (pb not fully solved)
raph
parents: 0
diff changeset
   101
    response['Content-Disposition'] = 'attachment; filename=%s' % encoded_name
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   102
    response.write(content)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   103
    return response        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   104
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   105
def _response_write(content):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   106
    response = HttpResponse()
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   107
    response.write(content)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   108
    return response
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   109
453
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   110
def xml_export(request, text_version, whichcomments):
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   111
  # Text version infos
460
2fdb7d095d5c Added import from XML file, including text, comments and attachments.
gibus
parents: 455
diff changeset
   112
  template_dict = { 'title': text_version.title, 'created': text_version.created, 'modified': text_version.modified, 'format': text_version.format, 'content': text_version.content, 'tags': text_version.tags, }
453
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   113
  
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   114
  # Comments
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   115
  comments = [] # whichcomments=="none"
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   116
  if whichcomments == "filtered" or whichcomments == "all":
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   117
    _comments = text_version.comment_set.all()
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   118
    if whichcomments == "filtered" :
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   119
      filteredIds = []
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   120
      if request.method == 'POST' :
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   121
        ll = request.POST.get('filteredIds',[]).split(",")
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   122
        filteredIds = [ int(l) for l in ll if l]
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   123
      _comments = text_version.comment_set.filter(id__in=filteredIds)
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   124
    comments = get_viewable_comments(request, _comments, text_version, order_by=('start_wrapper','start_offset','end_wrapper','end_offset'))
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   125
    # Add user name/email if missing comment name/email
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   126
    for comment in comments:
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   127
      users = User.objects.filter(id=comment.user_id)
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   128
      if not(comment.name):
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   129
        comment.name = users[0].username
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   130
      if not(comment.email):
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   131
        comment.email = users[0].email
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   132
      
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   133
    template_dict['comments'] = comments
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   134
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   135
  # Author
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   136
  users = User.objects.filter(id=text_version.user_id)
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   137
  if text_version.name:
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   138
    template_dict['name'] = text_version.name
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   139
  else:
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   140
    template_dict['name'] = users[0].username
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   141
  if text_version.email:
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   142
    template_dict['email'] = text_version.email
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   143
  else:
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   144
    template_dict['email'] = users[0].email
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   145
455
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   146
  # Attachments
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   147
  attachments = []
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   148
  template_dict['content'] = re.sub("%s" %settings.SITE_URL, '', template_dict['content']) # replaces absolute urls by relative urls
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   149
  attach_re = r'(?:/text/(?P<key>\w*))?/attach/(?P<attach_key>\w*)/'
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   150
  attach_str_textversion = r'/text/%s/attach/%s/'
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   151
  attach_str = r'/attach/%s/'
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   152
  for match in re.findall(attach_re, template_dict['content']):
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   153
    if match[0]: # removes text_version, attachements do not need it
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   154
      template_dict['content'] = template_dict['content'].replace(attach_str_textversion %match, attach_str %match[1])
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   155
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   156
    attach = Attachment.objects.get(key=match[1])
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   157
    img_fmt = imghdr.what(attach.data.path)
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   158
    img = open(attach.data.path, 'rb')
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   159
    attachments.append({'key': match[1], 'data': base64.b64encode(img.read())})
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   160
    img.close()
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   161
  template_dict['attachments'] = attachments
33c7e20efcb7 Added export of attachements as inline b64 images for appropriate formats.
gibus
parents: 454
diff changeset
   162
453
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   163
  # Renders template
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   164
  export_content = render_to_string('site/export.xml', template_dict, context_instance=RequestContext(request))
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   165
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   166
  # Returns HTTP response
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   167
  export_infos = EXPORT2_INFOS['xml']
1d314f629611 Added export to XML for re-import (nb. without attachements).
gibus
parents: 443
diff changeset
   168
  return _response_download(export_content, text_version.title, export_infos['mimetype'], export_infos['extension']) ;