|
1 """Translation helper functions.""" |
|
2 |
|
3 import locale |
|
4 import os |
|
5 import re |
|
6 import sys |
|
7 import gettext as gettext_module |
|
8 from cStringIO import StringIO |
|
9 |
|
10 from django.utils.importlib import import_module |
|
11 from django.utils.safestring import mark_safe, SafeData |
|
12 from django.utils.thread_support import currentThread |
|
13 |
|
14 # Translations are cached in a dictionary for every language+app tuple. |
|
15 # The active translations are stored by threadid to make them thread local. |
|
16 _translations = {} |
|
17 _active = {} |
|
18 |
|
19 # The default translation is based on the settings file. |
|
20 _default = None |
|
21 |
|
22 # This is a cache for normalized accept-header languages to prevent multiple |
|
23 # file lookups when checking the same locale on repeated requests. |
|
24 _accepted = {} |
|
25 |
|
26 # Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9. |
|
27 accept_language_re = re.compile(r''' |
|
28 ([A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*|\*) # "en", "en-au", "x-y-z", "*" |
|
29 (?:;q=(0(?:\.\d{,3})?|1(?:.0{,3})?))? # Optional "q=1.00", "q=0.8" |
|
30 (?:\s*,\s*|$) # Multiple accepts per header. |
|
31 ''', re.VERBOSE) |
|
32 |
|
33 def to_locale(language, to_lower=False): |
|
34 """ |
|
35 Turns a language name (en-us) into a locale name (en_US). If 'to_lower' is |
|
36 True, the last component is lower-cased (en_us). |
|
37 """ |
|
38 p = language.find('-') |
|
39 if p >= 0: |
|
40 if to_lower: |
|
41 return language[:p].lower()+'_'+language[p+1:].lower() |
|
42 else: |
|
43 return language[:p].lower()+'_'+language[p+1:].upper() |
|
44 else: |
|
45 return language.lower() |
|
46 |
|
47 def to_language(locale): |
|
48 """Turns a locale name (en_US) into a language name (en-us).""" |
|
49 p = locale.find('_') |
|
50 if p >= 0: |
|
51 return locale[:p].lower()+'-'+locale[p+1:].lower() |
|
52 else: |
|
53 return locale.lower() |
|
54 |
|
55 class DjangoTranslation(gettext_module.GNUTranslations): |
|
56 """ |
|
57 This class sets up the GNUTranslations context with regard to output |
|
58 charset. Django uses a defined DEFAULT_CHARSET as the output charset on |
|
59 Python 2.4. With Python 2.3, use DjangoTranslation23. |
|
60 """ |
|
61 def __init__(self, *args, **kw): |
|
62 from django.conf import settings |
|
63 gettext_module.GNUTranslations.__init__(self, *args, **kw) |
|
64 # Starting with Python 2.4, there's a function to define |
|
65 # the output charset. Before 2.4, the output charset is |
|
66 # identical with the translation file charset. |
|
67 try: |
|
68 self.set_output_charset('utf-8') |
|
69 except AttributeError: |
|
70 pass |
|
71 self.django_output_charset = 'utf-8' |
|
72 self.__language = '??' |
|
73 |
|
74 def merge(self, other): |
|
75 self._catalog.update(other._catalog) |
|
76 |
|
77 def set_language(self, language): |
|
78 self.__language = language |
|
79 |
|
80 def language(self): |
|
81 return self.__language |
|
82 |
|
83 def __repr__(self): |
|
84 return "<DjangoTranslation lang:%s>" % self.__language |
|
85 |
|
86 class DjangoTranslation23(DjangoTranslation): |
|
87 """ |
|
88 Compatibility class that is only used with Python 2.3. |
|
89 Python 2.3 doesn't support set_output_charset on translation objects and |
|
90 needs this wrapper class to make sure input charsets from translation files |
|
91 are correctly translated to output charsets. |
|
92 |
|
93 With a full switch to Python 2.4, this can be removed from the source. |
|
94 """ |
|
95 def gettext(self, msgid): |
|
96 res = self.ugettext(msgid) |
|
97 return res.encode(self.django_output_charset) |
|
98 |
|
99 def ngettext(self, msgid1, msgid2, n): |
|
100 res = self.ungettext(msgid1, msgid2, n) |
|
101 return res.encode(self.django_output_charset) |
|
102 |
|
103 def translation(language): |
|
104 """ |
|
105 Returns a translation object. |
|
106 |
|
107 This translation object will be constructed out of multiple GNUTranslations |
|
108 objects by merging their catalogs. It will construct a object for the |
|
109 requested language and add a fallback to the default language, if it's |
|
110 different from the requested language. |
|
111 """ |
|
112 global _translations |
|
113 |
|
114 t = _translations.get(language, None) |
|
115 if t is not None: |
|
116 return t |
|
117 |
|
118 from django.conf import settings |
|
119 |
|
120 # set up the right translation class |
|
121 klass = DjangoTranslation |
|
122 if sys.version_info < (2, 4): |
|
123 klass = DjangoTranslation23 |
|
124 |
|
125 globalpath = os.path.join(os.path.dirname(sys.modules[settings.__module__].__file__), 'locale') |
|
126 |
|
127 if settings.SETTINGS_MODULE is not None: |
|
128 parts = settings.SETTINGS_MODULE.split('.') |
|
129 project = import_module(parts[0]) |
|
130 projectpath = os.path.join(os.path.dirname(project.__file__), 'locale') |
|
131 else: |
|
132 projectpath = None |
|
133 |
|
134 def _fetch(lang, fallback=None): |
|
135 |
|
136 global _translations |
|
137 |
|
138 loc = to_locale(lang) |
|
139 |
|
140 res = _translations.get(lang, None) |
|
141 if res is not None: |
|
142 return res |
|
143 |
|
144 def _translation(path): |
|
145 try: |
|
146 t = gettext_module.translation('django', path, [loc], klass) |
|
147 t.set_language(lang) |
|
148 return t |
|
149 except IOError, e: |
|
150 return None |
|
151 |
|
152 res = _translation(globalpath) |
|
153 |
|
154 # We want to ensure that, for example, "en-gb" and "en-us" don't share |
|
155 # the same translation object (thus, merging en-us with a local update |
|
156 # doesn't affect en-gb), even though they will both use the core "en" |
|
157 # translation. So we have to subvert Python's internal gettext caching. |
|
158 base_lang = lambda x: x.split('-', 1)[0] |
|
159 if base_lang(lang) in [base_lang(trans) for trans in _translations]: |
|
160 res._info = res._info.copy() |
|
161 res._catalog = res._catalog.copy() |
|
162 |
|
163 def _merge(path): |
|
164 t = _translation(path) |
|
165 if t is not None: |
|
166 if res is None: |
|
167 return t |
|
168 else: |
|
169 res.merge(t) |
|
170 return res |
|
171 |
|
172 for localepath in settings.LOCALE_PATHS: |
|
173 if os.path.isdir(localepath): |
|
174 res = _merge(localepath) |
|
175 |
|
176 if projectpath and os.path.isdir(projectpath): |
|
177 res = _merge(projectpath) |
|
178 |
|
179 for appname in settings.INSTALLED_APPS: |
|
180 app = import_module(appname) |
|
181 apppath = os.path.join(os.path.dirname(app.__file__), 'locale') |
|
182 |
|
183 if os.path.isdir(apppath): |
|
184 res = _merge(apppath) |
|
185 |
|
186 if res is None: |
|
187 if fallback is not None: |
|
188 res = fallback |
|
189 else: |
|
190 return gettext_module.NullTranslations() |
|
191 _translations[lang] = res |
|
192 return res |
|
193 |
|
194 default_translation = _fetch(settings.LANGUAGE_CODE) |
|
195 current_translation = _fetch(language, fallback=default_translation) |
|
196 |
|
197 return current_translation |
|
198 |
|
199 def activate(language): |
|
200 """ |
|
201 Fetches the translation object for a given tuple of application name and |
|
202 language and installs it as the current translation object for the current |
|
203 thread. |
|
204 """ |
|
205 _active[currentThread()] = translation(language) |
|
206 |
|
207 def deactivate(): |
|
208 """ |
|
209 Deinstalls the currently active translation object so that further _ calls |
|
210 will resolve against the default translation object, again. |
|
211 """ |
|
212 global _active |
|
213 if currentThread() in _active: |
|
214 del _active[currentThread()] |
|
215 |
|
216 def deactivate_all(): |
|
217 """ |
|
218 Makes the active translation object a NullTranslations() instance. This is |
|
219 useful when we want delayed translations to appear as the original string |
|
220 for some reason. |
|
221 """ |
|
222 _active[currentThread()] = gettext_module.NullTranslations() |
|
223 |
|
224 def get_language(): |
|
225 """Returns the currently selected language.""" |
|
226 t = _active.get(currentThread(), None) |
|
227 if t is not None: |
|
228 try: |
|
229 return to_language(t.language()) |
|
230 except AttributeError: |
|
231 pass |
|
232 # If we don't have a real translation object, assume it's the default language. |
|
233 from django.conf import settings |
|
234 return settings.LANGUAGE_CODE |
|
235 |
|
236 def get_language_bidi(): |
|
237 """ |
|
238 Returns selected language's BiDi layout. |
|
239 False = left-to-right layout |
|
240 True = right-to-left layout |
|
241 """ |
|
242 from django.conf import settings |
|
243 |
|
244 base_lang = get_language().split('-')[0] |
|
245 return base_lang in settings.LANGUAGES_BIDI |
|
246 |
|
247 def catalog(): |
|
248 """ |
|
249 Returns the current active catalog for further processing. |
|
250 This can be used if you need to modify the catalog or want to access the |
|
251 whole message catalog instead of just translating one string. |
|
252 """ |
|
253 global _default, _active |
|
254 t = _active.get(currentThread(), None) |
|
255 if t is not None: |
|
256 return t |
|
257 if _default is None: |
|
258 from django.conf import settings |
|
259 _default = translation(settings.LANGUAGE_CODE) |
|
260 return _default |
|
261 |
|
262 def do_translate(message, translation_function): |
|
263 """ |
|
264 Translates 'message' using the given 'translation_function' name -- which |
|
265 will be either gettext or ugettext. It uses the current thread to find the |
|
266 translation object to use. If no current translation is activated, the |
|
267 message will be run through the default translation object. |
|
268 """ |
|
269 global _default, _active |
|
270 t = _active.get(currentThread(), None) |
|
271 if t is not None: |
|
272 result = getattr(t, translation_function)(message) |
|
273 else: |
|
274 if _default is None: |
|
275 from django.conf import settings |
|
276 _default = translation(settings.LANGUAGE_CODE) |
|
277 result = getattr(_default, translation_function)(message) |
|
278 if isinstance(message, SafeData): |
|
279 return mark_safe(result) |
|
280 return result |
|
281 |
|
282 def gettext(message): |
|
283 return do_translate(message, 'gettext') |
|
284 |
|
285 def ugettext(message): |
|
286 return do_translate(message, 'ugettext') |
|
287 |
|
288 def gettext_noop(message): |
|
289 """ |
|
290 Marks strings for translation but doesn't translate them now. This can be |
|
291 used to store strings in global variables that should stay in the base |
|
292 language (because they might be used externally) and will be translated |
|
293 later. |
|
294 """ |
|
295 return message |
|
296 |
|
297 def do_ntranslate(singular, plural, number, translation_function): |
|
298 global _default, _active |
|
299 |
|
300 t = _active.get(currentThread(), None) |
|
301 if t is not None: |
|
302 return getattr(t, translation_function)(singular, plural, number) |
|
303 if _default is None: |
|
304 from django.conf import settings |
|
305 _default = translation(settings.LANGUAGE_CODE) |
|
306 return getattr(_default, translation_function)(singular, plural, number) |
|
307 |
|
308 def ngettext(singular, plural, number): |
|
309 """ |
|
310 Returns a UTF-8 bytestring of the translation of either the singular or |
|
311 plural, based on the number. |
|
312 """ |
|
313 return do_ntranslate(singular, plural, number, 'ngettext') |
|
314 |
|
315 def ungettext(singular, plural, number): |
|
316 """ |
|
317 Returns a unicode strings of the translation of either the singular or |
|
318 plural, based on the number. |
|
319 """ |
|
320 return do_ntranslate(singular, plural, number, 'ungettext') |
|
321 |
|
322 def check_for_language(lang_code): |
|
323 """ |
|
324 Checks whether there is a global language file for the given language |
|
325 code. This is used to decide whether a user-provided language is |
|
326 available. This is only used for language codes from either the cookies or |
|
327 session. |
|
328 """ |
|
329 from django.conf import settings |
|
330 globalpath = os.path.join(os.path.dirname(sys.modules[settings.__module__].__file__), 'locale') |
|
331 if gettext_module.find('django', globalpath, [to_locale(lang_code)]) is not None: |
|
332 return True |
|
333 else: |
|
334 return False |
|
335 |
|
336 def get_language_from_request(request): |
|
337 """ |
|
338 Analyzes the request to find what language the user wants the system to |
|
339 show. Only languages listed in settings.LANGUAGES are taken into account. |
|
340 If the user requests a sublanguage where we have a main language, we send |
|
341 out the main language. |
|
342 """ |
|
343 global _accepted |
|
344 from django.conf import settings |
|
345 globalpath = os.path.join(os.path.dirname(sys.modules[settings.__module__].__file__), 'locale') |
|
346 supported = dict(settings.LANGUAGES) |
|
347 |
|
348 if hasattr(request, 'session'): |
|
349 lang_code = request.session.get('django_language', None) |
|
350 if lang_code in supported and lang_code is not None and check_for_language(lang_code): |
|
351 return lang_code |
|
352 |
|
353 lang_code = request.COOKIES.get(settings.LANGUAGE_COOKIE_NAME) |
|
354 if lang_code and lang_code in supported and check_for_language(lang_code): |
|
355 return lang_code |
|
356 |
|
357 accept = request.META.get('HTTP_ACCEPT_LANGUAGE', '') |
|
358 for accept_lang, unused in parse_accept_lang_header(accept): |
|
359 if accept_lang == '*': |
|
360 break |
|
361 |
|
362 # We have a very restricted form for our language files (no encoding |
|
363 # specifier, since they all must be UTF-8 and only one possible |
|
364 # language each time. So we avoid the overhead of gettext.find() and |
|
365 # work out the MO file manually. |
|
366 |
|
367 # 'normalized' is the root name of the locale in POSIX format (which is |
|
368 # the format used for the directories holding the MO files). |
|
369 normalized = locale.locale_alias.get(to_locale(accept_lang, True)) |
|
370 if not normalized: |
|
371 continue |
|
372 # Remove the default encoding from locale_alias. |
|
373 normalized = normalized.split('.')[0] |
|
374 |
|
375 if normalized in _accepted: |
|
376 # We've seen this locale before and have an MO file for it, so no |
|
377 # need to check again. |
|
378 return _accepted[normalized] |
|
379 |
|
380 for lang, dirname in ((accept_lang, normalized), |
|
381 (accept_lang.split('-')[0], normalized.split('_')[0])): |
|
382 if lang.lower() not in supported: |
|
383 continue |
|
384 langfile = os.path.join(globalpath, dirname, 'LC_MESSAGES', |
|
385 'django.mo') |
|
386 if os.path.exists(langfile): |
|
387 _accepted[normalized] = lang |
|
388 return lang |
|
389 |
|
390 return settings.LANGUAGE_CODE |
|
391 |
|
392 def get_date_formats(): |
|
393 """ |
|
394 Checks whether translation files provide a translation for some technical |
|
395 message ID to store date and time formats. If it doesn't contain one, the |
|
396 formats provided in the settings will be used. |
|
397 """ |
|
398 from django.conf import settings |
|
399 date_format = ugettext('DATE_FORMAT') |
|
400 datetime_format = ugettext('DATETIME_FORMAT') |
|
401 time_format = ugettext('TIME_FORMAT') |
|
402 if date_format == 'DATE_FORMAT': |
|
403 date_format = settings.DATE_FORMAT |
|
404 if datetime_format == 'DATETIME_FORMAT': |
|
405 datetime_format = settings.DATETIME_FORMAT |
|
406 if time_format == 'TIME_FORMAT': |
|
407 time_format = settings.TIME_FORMAT |
|
408 return date_format, datetime_format, time_format |
|
409 |
|
410 def get_partial_date_formats(): |
|
411 """ |
|
412 Checks whether translation files provide a translation for some technical |
|
413 message ID to store partial date formats. If it doesn't contain one, the |
|
414 formats provided in the settings will be used. |
|
415 """ |
|
416 from django.conf import settings |
|
417 year_month_format = ugettext('YEAR_MONTH_FORMAT') |
|
418 month_day_format = ugettext('MONTH_DAY_FORMAT') |
|
419 if year_month_format == 'YEAR_MONTH_FORMAT': |
|
420 year_month_format = settings.YEAR_MONTH_FORMAT |
|
421 if month_day_format == 'MONTH_DAY_FORMAT': |
|
422 month_day_format = settings.MONTH_DAY_FORMAT |
|
423 return year_month_format, month_day_format |
|
424 |
|
425 dot_re = re.compile(r'\S') |
|
426 def blankout(src, char): |
|
427 """ |
|
428 Changes every non-whitespace character to the given char. |
|
429 Used in the templatize function. |
|
430 """ |
|
431 return dot_re.sub(char, src) |
|
432 |
|
433 inline_re = re.compile(r"""^\s*trans\s+((?:".*?")|(?:'.*?'))\s*""") |
|
434 block_re = re.compile(r"""^\s*blocktrans(?:\s+|$)""") |
|
435 endblock_re = re.compile(r"""^\s*endblocktrans$""") |
|
436 plural_re = re.compile(r"""^\s*plural$""") |
|
437 constant_re = re.compile(r"""_\(((?:".*?")|(?:'.*?'))\)""") |
|
438 |
|
439 def templatize(src): |
|
440 """ |
|
441 Turns a Django template into something that is understood by xgettext. It |
|
442 does so by translating the Django translation tags into standard gettext |
|
443 function invocations. |
|
444 """ |
|
445 from django.template import Lexer, TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK |
|
446 out = StringIO() |
|
447 intrans = False |
|
448 inplural = False |
|
449 singular = [] |
|
450 plural = [] |
|
451 for t in Lexer(src, None).tokenize(): |
|
452 if intrans: |
|
453 if t.token_type == TOKEN_BLOCK: |
|
454 endbmatch = endblock_re.match(t.contents) |
|
455 pluralmatch = plural_re.match(t.contents) |
|
456 if endbmatch: |
|
457 if inplural: |
|
458 out.write(' ngettext(%r,%r,count) ' % (''.join(singular), ''.join(plural))) |
|
459 for part in singular: |
|
460 out.write(blankout(part, 'S')) |
|
461 for part in plural: |
|
462 out.write(blankout(part, 'P')) |
|
463 else: |
|
464 out.write(' gettext(%r) ' % ''.join(singular)) |
|
465 for part in singular: |
|
466 out.write(blankout(part, 'S')) |
|
467 intrans = False |
|
468 inplural = False |
|
469 singular = [] |
|
470 plural = [] |
|
471 elif pluralmatch: |
|
472 inplural = True |
|
473 else: |
|
474 raise SyntaxError("Translation blocks must not include other block tags: %s" % t.contents) |
|
475 elif t.token_type == TOKEN_VAR: |
|
476 if inplural: |
|
477 plural.append('%%(%s)s' % t.contents) |
|
478 else: |
|
479 singular.append('%%(%s)s' % t.contents) |
|
480 elif t.token_type == TOKEN_TEXT: |
|
481 if inplural: |
|
482 plural.append(t.contents) |
|
483 else: |
|
484 singular.append(t.contents) |
|
485 else: |
|
486 if t.token_type == TOKEN_BLOCK: |
|
487 imatch = inline_re.match(t.contents) |
|
488 bmatch = block_re.match(t.contents) |
|
489 cmatches = constant_re.findall(t.contents) |
|
490 if imatch: |
|
491 g = imatch.group(1) |
|
492 if g[0] == '"': g = g.strip('"') |
|
493 elif g[0] == "'": g = g.strip("'") |
|
494 out.write(' gettext(%r) ' % g) |
|
495 elif bmatch: |
|
496 for fmatch in constant_re.findall(t.contents): |
|
497 out.write(' _(%s) ' % fmatch) |
|
498 intrans = True |
|
499 inplural = False |
|
500 singular = [] |
|
501 plural = [] |
|
502 elif cmatches: |
|
503 for cmatch in cmatches: |
|
504 out.write(' _(%s) ' % cmatch) |
|
505 else: |
|
506 out.write(blankout(t.contents, 'B')) |
|
507 elif t.token_type == TOKEN_VAR: |
|
508 parts = t.contents.split('|') |
|
509 cmatch = constant_re.match(parts[0]) |
|
510 if cmatch: |
|
511 out.write(' _(%s) ' % cmatch.group(1)) |
|
512 for p in parts[1:]: |
|
513 if p.find(':_(') >= 0: |
|
514 out.write(' %s ' % p.split(':',1)[1]) |
|
515 else: |
|
516 out.write(blankout(p, 'F')) |
|
517 else: |
|
518 out.write(blankout(t.contents, 'X')) |
|
519 return out.getvalue() |
|
520 |
|
521 def parse_accept_lang_header(lang_string): |
|
522 """ |
|
523 Parses the lang_string, which is the body of an HTTP Accept-Language |
|
524 header, and returns a list of (lang, q-value), ordered by 'q' values. |
|
525 |
|
526 Any format errors in lang_string results in an empty list being returned. |
|
527 """ |
|
528 result = [] |
|
529 pieces = accept_language_re.split(lang_string) |
|
530 if pieces[-1]: |
|
531 return [] |
|
532 for i in range(0, len(pieces) - 1, 3): |
|
533 first, lang, priority = pieces[i : i + 3] |
|
534 if first: |
|
535 return [] |
|
536 priority = priority and float(priority) or 1.0 |
|
537 result.append((lang, priority)) |
|
538 result.sort(lambda x, y: -cmp(x[1], y[1])) |
|
539 return result |