|
0
|
1 |
# -*- coding: utf-8 -*- |
|
|
2 |
from django.test import TestCase |
|
|
3 |
from cm.utils.spannifier import * |
|
|
4 |
from BeautifulSoup import BeautifulSoup |
|
|
5 |
|
|
|
6 |
# python manage.py test |
|
|
7 |
# |
|
|
8 |
# python manage.py test cm.SpannifyTest |
|
|
9 |
|
|
|
10 |
file_tests = ["simple.html",] |
|
|
11 |
|
|
|
12 |
class SpannifyTest(TestCase): |
|
|
13 |
|
|
|
14 |
def test_spannify(self): |
|
|
15 |
string_tests_spannify = [ |
|
|
16 |
[u"""<body>kéké</body>""", |
|
|
17 |
u"""<body><span id="sv_0" class="c-s"><span id="sv-0" class="c-count-0 c-c">kéké</span></span></body>""", |
|
|
18 |
u'kéké', |
|
|
19 |
], |
|
|
20 |
|
|
|
21 |
[u"""<body>aaa <span>bbb</span> ccc ddd <b>eee</b></body>""", |
|
|
22 |
u"""<body><span id="sv_0" class="c-s"><span id="sv-0" class="c-count-0 c-c">aaa </span></span><span><span id="sv_1" class="c-s"><span id="sv-1" class="c-count-0 c-c">bbb</span></span></span><span id="sv_2" class="c-s"><span id="sv-2" class="c-count-0 c-c"> ccc ddd </span></span><b><span id="sv_3" class="c-s"><span id="sv-3" class="c-count-0 c-c">eee</span></span></b></body>""", |
|
|
23 |
u"""aaa bbb ccc ddd eee""",] |
|
|
24 |
] |
|
|
25 |
|
|
|
26 |
# |
|
|
27 |
for input, expected_spanned, expected_raw in string_tests_spannify : |
|
|
28 |
res, raw_text, corresp = spannify(input) |
|
|
29 |
self.assertEqual(unicode(res),expected_spanned) |
|
|
30 |
self.assertEqual(unicode(raw_text),expected_raw) |
|
|
31 |
|
|
|
32 |
# for filename in file_tests : |
|
|
33 |
# if filename[:5] == "span_" : |
|
|
34 |
# doc = xml.dom.minidom.parse('cm/tests/data/%s' % filename) |
|
|
35 |
# soup = BeautifulSoup('cm/tests/data/%s' % filename, convertEntities=["xml", "html"]) |
|
|
36 |
# |
|
|
37 |
# res = spannifier.spannify(doc) |
|
|
38 |
# res2 = spannifier.spannify_new(soup) |
|
|
39 |
# |
|
|
40 |
# expectedResult = file('cm/tests/data/res_%s' % filename).read() |
|
|
41 |
## print res |
|
|
42 |
# self.assertEqual(res2,expectedResult) |
|
|
43 |
|
|
|
44 |
|
|
271
|
45 |
|
|
|
46 |
def test_long_spannify(self): |
|
|
47 |
content = unicode(file('src/cm/tests/data/long_text_to_spannify.html').read(), 'utf8') |
|
|
48 |
|
|
|
49 |
res, raw_text, corresp = spannify(content) |