|
1 # -*- coding: utf-8 -*- |
|
2 from django.test import TestCase |
|
3 from cm.utils.spannifier import * |
|
4 from BeautifulSoup import BeautifulSoup |
|
5 |
|
6 # python manage.py test |
|
7 # |
|
8 # python manage.py test cm.SpannifyTest |
|
9 |
|
10 file_tests = ["simple.html",] |
|
11 |
|
12 class SpannifyTest(TestCase): |
|
13 |
|
14 def test_spannify(self): |
|
15 string_tests_spannify = [ |
|
16 [u"""<body>kéké</body>""", |
|
17 u"""<body><span id="sv_0" class="c-s"><span id="sv-0" class="c-count-0 c-c">kéké</span></span></body>""", |
|
18 u'kéké', |
|
19 ], |
|
20 |
|
21 [u"""<body>aaa <span>bbb</span> ccc ddd <b>eee</b></body>""", |
|
22 u"""<body><span id="sv_0" class="c-s"><span id="sv-0" class="c-count-0 c-c">aaa </span></span><span><span id="sv_1" class="c-s"><span id="sv-1" class="c-count-0 c-c">bbb</span></span></span><span id="sv_2" class="c-s"><span id="sv-2" class="c-count-0 c-c"> ccc ddd </span></span><b><span id="sv_3" class="c-s"><span id="sv-3" class="c-count-0 c-c">eee</span></span></b></body>""", |
|
23 u"""aaa bbb ccc ddd eee""",] |
|
24 ] |
|
25 |
|
26 # |
|
27 for input, expected_spanned, expected_raw in string_tests_spannify : |
|
28 res, raw_text, corresp = spannify(input) |
|
29 self.assertEqual(unicode(res),expected_spanned) |
|
30 self.assertEqual(unicode(raw_text),expected_raw) |
|
31 |
|
32 # for filename in file_tests : |
|
33 # if filename[:5] == "span_" : |
|
34 # doc = xml.dom.minidom.parse('cm/tests/data/%s' % filename) |
|
35 # soup = BeautifulSoup('cm/tests/data/%s' % filename, convertEntities=["xml", "html"]) |
|
36 # |
|
37 # res = spannifier.spannify(doc) |
|
38 # res2 = spannifier.spannify_new(soup) |
|
39 # |
|
40 # expectedResult = file('cm/tests/data/res_%s' % filename).read() |
|
41 ## print res |
|
42 # self.assertEqual(res2,expectedResult) |
|
43 |
|
44 |