35 pos = len(lines[-1]) |
34 pos = len(lines[-1]) |
36 yield word |
35 yield word |
37 return u''.join(_generator()) |
36 return u''.join(_generator()) |
38 wrap = allow_lazy(wrap, unicode) |
37 wrap = allow_lazy(wrap, unicode) |
39 |
38 |
40 def truncate_words(s, num): |
39 def truncate_words(s, num, end_text='...'): |
41 "Truncates a string after a certain number of words." |
40 """Truncates a string after a certain number of words. Takes an optional |
|
41 argument of what should be used to notify that the string has been |
|
42 truncated, defaults to ellipsis (...)""" |
42 s = force_unicode(s) |
43 s = force_unicode(s) |
43 length = int(num) |
44 length = int(num) |
44 words = s.split() |
45 words = s.split() |
45 if len(words) > length: |
46 if len(words) > length: |
46 words = words[:length] |
47 words = words[:length] |
47 if not words[-1].endswith('...'): |
48 if not words[-1].endswith(end_text): |
48 words.append('...') |
49 words.append(end_text) |
49 return u' '.join(words) |
50 return u' '.join(words) |
50 truncate_words = allow_lazy(truncate_words, unicode) |
51 truncate_words = allow_lazy(truncate_words, unicode) |
51 |
52 |
52 def truncate_html_words(s, num): |
53 def truncate_html_words(s, num, end_text='...'): |
53 """ |
54 """Truncates html to a certain number of words (not counting tags and |
54 Truncates html to a certain number of words (not counting tags and |
|
55 comments). Closes opened tags if they were correctly closed in the given |
55 comments). Closes opened tags if they were correctly closed in the given |
56 html. |
56 html. Takes an optional argument of what should be used to notify that the |
57 """ |
57 string has been truncated, defaults to ellipsis (...).""" |
58 s = force_unicode(s) |
58 s = force_unicode(s) |
59 length = int(num) |
59 length = int(num) |
60 if length <= 0: |
60 if length <= 0: |
61 return u'' |
61 return u'' |
62 html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input') |
62 html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input') |
63 # Set up regular expressions |
63 # Set up regular expressions |
64 re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U) |
64 re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U) |
65 re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>') |
65 re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>') |
66 # Count non-HTML words and keep note of open tags |
66 # Count non-HTML words and keep note of open tags |
67 pos = 0 |
67 pos = 0 |
68 ellipsis_pos = 0 |
68 end_text_pos = 0 |
69 words = 0 |
69 words = 0 |
70 open_tags = [] |
70 open_tags = [] |
71 while words <= length: |
71 while words <= length: |
72 m = re_words.search(s, pos) |
72 m = re_words.search(s, pos) |
73 if not m: |
73 if not m: |
76 pos = m.end(0) |
76 pos = m.end(0) |
77 if m.group(1): |
77 if m.group(1): |
78 # It's an actual non-HTML word |
78 # It's an actual non-HTML word |
79 words += 1 |
79 words += 1 |
80 if words == length: |
80 if words == length: |
81 ellipsis_pos = pos |
81 end_text_pos = pos |
82 continue |
82 continue |
83 # Check for tag |
83 # Check for tag |
84 tag = re_tag.match(m.group(0)) |
84 tag = re_tag.match(m.group(0)) |
85 if not tag or ellipsis_pos: |
85 if not tag or end_text_pos: |
86 # Don't worry about non tags or tags after our truncate point |
86 # Don't worry about non tags or tags after our truncate point |
87 continue |
87 continue |
88 closing_tag, tagname, self_closing = tag.groups() |
88 closing_tag, tagname, self_closing = tag.groups() |
89 tagname = tagname.lower() # Element names are always case-insensitive |
89 tagname = tagname.lower() # Element names are always case-insensitive |
90 if self_closing or tagname in html4_singlets: |
90 if self_closing or tagname in html4_singlets: |
102 # Add it to the start of the open tags list |
102 # Add it to the start of the open tags list |
103 open_tags.insert(0, tagname) |
103 open_tags.insert(0, tagname) |
104 if words <= length: |
104 if words <= length: |
105 # Don't try to close tags if we don't need to truncate |
105 # Don't try to close tags if we don't need to truncate |
106 return s |
106 return s |
107 out = s[:ellipsis_pos] + ' ...' |
107 out = s[:end_text_pos] |
|
108 if end_text: |
|
109 out += ' ' + end_text |
108 # Close any tags still open |
110 # Close any tags still open |
109 for tag in open_tags: |
111 for tag in open_tags: |
110 out += '</%s>' % tag |
112 out += '</%s>' % tag |
111 # Return string |
113 # Return string |
112 return out |
114 return out |
155 return text |
157 return text |
156 recapitalize = allow_lazy(recapitalize) |
158 recapitalize = allow_lazy(recapitalize) |
157 |
159 |
158 def phone2numeric(phone): |
160 def phone2numeric(phone): |
159 "Converts a phone number with letters into its numeric equivalent." |
161 "Converts a phone number with letters into its numeric equivalent." |
160 letters = re.compile(r'[A-PR-Y]', re.I) |
162 letters = re.compile(r'[A-Z]', re.I) |
161 char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3', |
163 char2number = lambda m: {'a': '2', 'b': '2', 'c': '2', 'd': '3', 'e': '3', |
162 'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5', |
164 'f': '3', 'g': '4', 'h': '4', 'i': '4', 'j': '5', 'k': '5', 'l': '5', |
163 'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7', |
165 'm': '6', 'n': '6', 'o': '6', 'p': '7', 'q': '7', 'r': '7', 's': '7', |
164 's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8', |
166 't': '8', 'u': '8', 'v': '8', 'w': '9', 'x': '9', 'y': '9', 'z': '9', |
165 'y': '9', 'x': '9'}.get(m.group(0).lower()) |
167 }.get(m.group(0).lower()) |
166 return letters.sub(char2number, phone) |
168 return letters.sub(char2number, phone) |
167 phone2numeric = allow_lazy(phone2numeric) |
169 phone2numeric = allow_lazy(phone2numeric) |
168 |
170 |
169 # From http://www.xhaus.com/alan/python/httpcomp.html#gzip |
171 # From http://www.xhaus.com/alan/python/httpcomp.html#gzip |
170 # Used with permission. |
172 # Used with permission. |
184 return r"\u%04x" % ord(match.group(1)) |
186 return r"\u%04x" % ord(match.group(1)) |
185 |
187 |
186 if type(s) == str: |
188 if type(s) == str: |
187 s = s.decode('utf-8') |
189 s = s.decode('utf-8') |
188 elif type(s) != unicode: |
190 elif type(s) != unicode: |
189 raise TypeError, s |
191 raise TypeError(s) |
190 s = s.replace('\\', '\\\\') |
192 s = s.replace('\\', '\\\\') |
191 s = s.replace('\r', '\\r') |
193 s = s.replace('\r', '\\r') |
192 s = s.replace('\n', '\\n') |
194 s = s.replace('\n', '\\n') |
193 s = s.replace('\t', '\\t') |
195 s = s.replace('\t', '\\t') |
194 s = s.replace("'", "\\'") |
196 s = s.replace("'", "\\'") |
198 javascript_quote = allow_lazy(javascript_quote, unicode) |
200 javascript_quote = allow_lazy(javascript_quote, unicode) |
199 |
201 |
200 # Expression to match some_token and some_token="with spaces" (and similarly |
202 # Expression to match some_token and some_token="with spaces" (and similarly |
201 # for single-quoted strings). |
203 # for single-quoted strings). |
202 smart_split_re = re.compile(r""" |
204 smart_split_re = re.compile(r""" |
203 ([^\s"]*"(?:[^"\\]*(?:\\.[^"\\]*)*)"\S*| |
205 ((?: |
204 [^\s']*'(?:[^'\\]*(?:\\.[^'\\]*)*)'\S*| |
206 [^\s'"]* |
205 \S+)""", re.VERBOSE) |
207 (?: |
|
208 (?:"(?:[^"\\]|\\.)*" | '(?:[^'\\]|\\.)*') |
|
209 [^\s'"]* |
|
210 )+ |
|
211 ) | \S+) |
|
212 """, re.VERBOSE) |
206 |
213 |
207 def smart_split(text): |
214 def smart_split(text): |
208 r""" |
215 r""" |
209 Generator that splits a string by spaces, leaving quoted phrases together. |
216 Generator that splits a string by spaces, leaving quoted phrases together. |
210 Supports both single and double quotes, and supports escaping quotes with |
217 Supports both single and double quotes, and supports escaping quotes with |