src/cm/views/export.py
author gibus
Sat, 14 Sep 2013 10:00:45 +0200
changeset 533 c7aed0340a00
parent 495 f86b611f1876
permissions -rw-r--r--
For some reason, when exporting commented text with background colors, angle brackets for html spans are sometimes returned as entities by soup.

from django import forms
from django.core.urlresolvers import reverse
from django.http import HttpResponse, HttpResponseRedirect, Http404
from django.shortcuts import render_to_response
from django.template.loader import render_to_string
from django.template import RequestContext
from django.utils.translation import ugettext as _, ugettext_lazy
from django.contrib.auth.models import User
from django.conf import settings
from cm.converters.pandoc_converters import pandoc_convert, do_tidy
from cm.models import Text, TextVersion, Attachment, Comment
from cm.security import get_viewable_comments
import mimetypes
import simplejson
import imghdr
import base64
import re
from cm.cm_settings import USE_ABI

EXPORT2_INFOS = {
# key -> { mimetype, extension}
's5' :   {},
'pdf' :  {'mimetype': 'application/pdf', 'extension':'pdf'},
'markdown' :  {'mimetype': 'text/plain', 'extension':'mkd'},
'odt' :  {'mimetype': 'application/vnd.oasis.opendocument.text', 'extension':'odt'},
'doc' :  {'mimetype': 'application/msword', 'extension':'doc'},
'docx' :  {'mimetype': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', 'extension':'docx'},
'latex' :{'mimetype': 'text/x-tex', 'extension':'tex'},
'html' :{'mimetype': 'text/html', 'extension':'html'},
'epub' :{'mimetype': 'application/epub+zip', 'extension':'epub'},
'raw' : {'mimetype': 'text/plain', 'extension':'txt'},
'xml' : {'mimetype': 'text/xml', 'extension':'xml'},
}

HTML_HEADER = u"""
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html><head>
<STYLE TYPE='text/css'>
div.pagebreakhere {
    page-break-before: always ;
}
</STYLE>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/></head>
<body>%s</body>
</html>
"""
 
def content_export2(request, content, title, content_format, format, use_pandoc, download_response):
    # TODO : formats must be imported from converters
#    import pdb;pdb.set_trace()
    if format == 'raw' :
        export_content = content
    elif content_format == 'html' and format == 'html':
        export_content = HTML_HEADER % content
    elif content_format == 'markdown' and format == 'markdown':
        export_content = content
    else:
        if use_pandoc :
          # markdown2pdf is buggy => convert to HTML and use abiword to export in PDF
          if format in ('pdf', 'odt', 'docx', 'doc') and USE_ABI:
            html_content = pandoc_convert(content, content_format, 'html', full=True)
            from cm.converters.abi_converters import AbiFileConverter
            converter = AbiFileConverter()
            fix_content = converter.add_html_header(html_content)
            export_content = converter.convert_from_html(fix_content, format)
          else:
            export_content = pandoc_convert(content, content_format, format, full=True)
        else :
            fix_content = content
            if content_format == 'html':
                if USE_ABI:
                  from cm.converters.abi_converters import AbiFileConverter
                  converter = AbiFileConverter()
                  fix_content = converter.add_html_header(content)
                else:
                  from cm.converters.oo_converters import combine_css_body                
                  fix_content = combine_css_body(content, '')
            if USE_ABI:
              from cm.converters.abi_converters import AbiFileConverter
              converter = AbiFileConverter()
              try:
                export_content = converter.convert_from_html(fix_content, format)
              except:
                if content_format == 'html':
                  from cm.converters.oo_converters import combine_css_body                
                  fix_content = combine_css_body(content, '')
                from cm.converters.oo_converters import convert_html as oo_convert                
                export_content = oo_convert(fix_content, format)
            else:
              from cm.converters.oo_converters import convert_html as oo_convert                
              export_content = oo_convert(fix_content, format)
    
    export_infos = EXPORT2_INFOS[format]
     
    if download_response:
        return _response_download(export_content, title, export_infos['mimetype'], export_infos['extension']) ;
    else:
        return _response_write(export_content)    
    

def _response_download(content, title, mimetype, extension):
    response = HttpResponse(mimetype=mimetype)
    file_title = title + '.' + extension
    from email.header import Header
    encoded_name = str(Header(file_title.encode('utf8'), charset='utf8', maxlinelen=500))
    # TODO: find a way to include long (more than 76 chars) into header
    encoded_name = encoded_name.replace('\n','')
    response['Content-Disposition'] = 'attachment; filename=%s' % encoded_name
    response.write(content)
    return response        

def _response_write(content):
    response = HttpResponse()
    response.write(content)
    return response

def xml_export(request, text_version, whichcomments):
  # Text version infos
  template_dict = { 'title': text_version.title, 'created': text_version.created, 'modified': text_version.modified, 'format': text_version.format, 'content': text_version.content, 'tags': text_version.tags, }
  
  # Comments
  comments = [] # whichcomments=="none"
  if whichcomments == "filtered" or whichcomments == "all":
    _comments = text_version.comment_set.all()
    if whichcomments == "filtered" :
      filteredIds = []
      if request.method == 'POST' :
        ll = request.POST.get('filteredIds',[]).split(",")
        filteredIds = [ int(l) for l in ll if l]
      _comments = text_version.comment_set.filter(id__in=filteredIds)
    comments = get_viewable_comments(request, _comments, text_version, order_by=('start_wrapper','start_offset','end_wrapper','end_offset'))
    # Add user name/email if missing comment name/email
    for comment in comments:
      users = User.objects.filter(id=comment.user_id)
      if not(comment.name):
        comment.name = users[0].username
      if not(comment.email):
        comment.email = users[0].email
      
    template_dict['comments'] = comments

  # Author
  users = User.objects.filter(id=text_version.user_id)
  if text_version.name:
    template_dict['name'] = text_version.name
  else:
    template_dict['name'] = users[0].username
  if text_version.email:
    template_dict['email'] = text_version.email
  else:
    template_dict['email'] = users[0].email

  # Attachments
  attachments = []
  template_dict['content'] = re.sub("%s" %settings.SITE_URL, '', template_dict['content']) # replaces absolute urls by relative urls
  attach_re = r'(?:/text/(?P<key>\w*))?/attach/(?P<attach_key>\w*)/'
  attach_str_textversion = r'/text/%s/attach/%s/'
  attach_str = r'/attach/%s/'
  for match in re.findall(attach_re, template_dict['content']):
    if match[0]: # removes text_version, attachements do not need it
      template_dict['content'] = template_dict['content'].replace(attach_str_textversion %match, attach_str %match[1])

    attach = Attachment.objects.get(key=match[1])
    img_fmt = imghdr.what(attach.data.path)
    img = open(attach.data.path, 'rb')
    attachments.append({'key': match[1], 'data': base64.b64encode(img.read())})
    img.close()
  template_dict['attachments'] = attachments

  # Renders template
  export_content = render_to_string('site/export.xml', template_dict, context_instance=RequestContext(request))

  # Returns HTTP response
  export_infos = EXPORT2_INFOS['xml']
  return _response_download(export_content, text_version.title, export_infos['mimetype'], export_infos['extension']) ;