src/cm/converters/old_converters.py
author raph
Tue, 24 Nov 2009 10:31:11 +0100
changeset 5 c3594e4df7c1
parent 0 40c8f766c9b8
permissions -rw-r--r--
lazy strings
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     1
########## converters
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     2
from django.core.cache import cache
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     3
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     4
# adapted [to django] from http://code.activestate.com/recipes/325205/
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     5
def memoize(f):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     6
    def g(*args, **kwargs):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     7
        key = ( f.__name__, f, tuple(args), frozenset(kwargs.items()) )
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     8
        val = cache.get(key)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     9
        if not val:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    10
            val = f(*args, **kwargs)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    11
            cache.set(key,val)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    12
        return val
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    13
    return g
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    14
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    15
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    16
def to_unicode(string):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    17
    if type(string) != 'unicode':
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    18
        return string.decode('utf8')
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    19
    else:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    20
        return string 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    21
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    22
def to_utf8(string):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    23
    if type(string) != 'str':
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    24
        return string.encode('utf8')
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    25
    else:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    26
        return string
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    27
 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    28
#@memoize
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    29
def rst_to_html(rst):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    30
    from docutils import core, io
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    31
    html, pub = _get_html_and_pub(rst)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    32
    parts = pub.writer.parts
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    33
    return parts['stylesheet']+parts['body']
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    34
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    35
#@memoize
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    36
def rst_to_fullhtml(rst):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    37
    html, pub = _get_html_and_pub(rst)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    38
    parts = pub.writer.parts
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    39
    return html 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    40
    #return '<html><head>' + parts['stylesheet'] + '</head><body>' + parts['body'] + '</body></html>' 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    41
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    42
def markdown_to_html(markdown):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    43
    return _markdown_to_html(markdown)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    44
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    45
def markdown_to_fullhtml(markdown):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    46
    return '<html><body>'+_markdown_to_html(markdown) + '</body></html>'
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    47
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    48
def _markdown_to_html(markdown):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    49
    from markdown import Markdown
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    50
    md = Markdown()
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    51
    html = md.convert(markdown)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    52
    return html
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    53
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    54
def _get_html_and_pub(rst):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    55
    from docutils import core, io
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    56
    html, pub = core.publish_programmatically(
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    57
            source_class=io.StringInput, source=rst,
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    58
            source_path=None,
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    59
            destination_class=io.StringOutput, destination=None,
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    60
            destination_path=None,
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    61
            reader=None, reader_name='standalone',
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    62
            parser=None, parser_name='restructuredtext',
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    63
            writer=None, writer_name='HTML',
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    64
            settings=None, settings_spec=None, settings_overrides=None,
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    65
            config_section=None, enable_exit_status=None)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    66
    return html, pub
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    67
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    68
#@memoize
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    69
def html_to_pdf(html):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    70
    html = to_utf8(html)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    71
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    72
    import sx.pisa3 as pisa
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    73
    import StringIO
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    74
    dst = StringIO.StringIO()
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    75
    result = pisa.CreatePDF(html, dst)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    76
    if not result.err:    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    77
        pdf = dst.getvalue()
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    78
        dst.close()
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    79
        return pdf
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    80
    else:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    81
        return None
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    82
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    83
# http://www.aaronsw.com/2002/html2text/
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    84
#@memoize    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    85
def html_to_markdown(html):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    86
    from com.ext.html2text import html2text
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    87
    return html2text(html)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    88
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    89
########## formats
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    90
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    91
FORMATS = {
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    92
           'HTML' : {'name': 'HTML', 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    93
                     'to_format' : {'Markdown' : html_to_markdown, }
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    94
                     },
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    95
           'FULLHTML' : {'name': 'FULLHTML', 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    96
                     'to_format' : {'PDF' : html_to_pdf, }
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    97
                     },
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    98
           'RST' : {'name': 'reStructuredText', 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    99
                     'to_format' : {'HTML' : rst_to_html,
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   100
                                    'FULLHTML' : rst_to_fullhtml,
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   101
                                    }
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   102
                     },
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   103
                     
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   104
           'Markdown' : {'name': 'Markdown', 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   105
                     'to_format' : {'HTML' : markdown_to_html,
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   106
                                    'FULLHTML' : markdown_to_fullhtml,
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   107
                                    }
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   108
                     },
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   109
           'Textile' : {'name': 'Textile', 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   110
                     },
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   111
           'PDF' : {'name': 'PDF', 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   112
                     },
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   113
            }            
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   114
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   115
CHOICES_FORMATS = [ (k,v.get('name')) for k,v in FORMATS.items()]
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   116
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   117
INPUT_FORMATS = ['RST','Markdown']
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   118
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   119
DEFAULT_INPUT_FORMAT = 'Markdown'
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   120
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   121
CHOICES_INPUT_FORMATS = [ (k,v.get('name')) for k,v in FORMATS.items() if k in INPUT_FORMATS]
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   122
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   123
def get_supported_conversions(from_format):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   124
    return FORMATS[from_format]['to_format'].keys()
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   125
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   126
def is_supported_conversion(from_format, to_format):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   127
    infos = FORMATS.get(from_format)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   128
    return infos.get('to_format') and infos.get('to_format').get(to_format)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   129
 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   130
def convert(content, from_format, to_format):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   131
    if is_supported_conversion(from_format, to_format):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   132
        infos = FORMATS.get(from_format)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   133
        conv_fun = infos.get('to_format').get(to_format)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   134
        return conv_fun(content)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   135
    else:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   136
        pass