mylar/lib/mako/ext/extract.py

109 lines
4.2 KiB
Python

import re
from mako import compat
from mako import lexer
from mako import parsetree
class MessageExtractor(object):
def process_file(self, fileobj):
template_node = lexer.Lexer(
fileobj.read(),
input_encoding=self.config['encoding']).parse()
for extracted in self.extract_nodes(template_node.get_children()):
yield extracted
def extract_nodes(self, nodes):
translator_comments = []
in_translator_comments = False
input_encoding = self.config['encoding'] or 'ascii'
comment_tags = list(
filter(None, re.split(r'\s+', self.config['comment-tags'])))
for node in nodes:
child_nodes = None
if in_translator_comments and \
isinstance(node, parsetree.Text) and \
not node.content.strip():
# Ignore whitespace within translator comments
continue
if isinstance(node, parsetree.Comment):
value = node.text.strip()
if in_translator_comments:
translator_comments.extend(
self._split_comment(node.lineno, value))
continue
for comment_tag in comment_tags:
if value.startswith(comment_tag):
in_translator_comments = True
translator_comments.extend(
self._split_comment(node.lineno, value))
continue
if isinstance(node, parsetree.DefTag):
code = node.function_decl.code
child_nodes = node.nodes
elif isinstance(node, parsetree.BlockTag):
code = node.body_decl.code
child_nodes = node.nodes
elif isinstance(node, parsetree.CallTag):
code = node.code.code
child_nodes = node.nodes
elif isinstance(node, parsetree.PageTag):
code = node.body_decl.code
elif isinstance(node, parsetree.CallNamespaceTag):
code = node.expression
child_nodes = node.nodes
elif isinstance(node, parsetree.ControlLine):
if node.isend:
in_translator_comments = False
continue
code = node.text
elif isinstance(node, parsetree.Code):
in_translator_comments = False
code = node.code.code
elif isinstance(node, parsetree.Expression):
code = node.code.code
else:
continue
# Comments don't apply unless they immediately preceed the message
if translator_comments and \
translator_comments[-1][0] < node.lineno - 1:
translator_comments = []
translator_strings = [
comment[1] for comment in translator_comments]
if isinstance(code, compat.text_type):
code = code.encode(input_encoding, 'backslashreplace')
used_translator_comments = False
# We add extra newline to work around a pybabel bug
# (see python-babel/babel#274, parse_encoding dies if the first
# input string of the input is non-ascii)
# Also, because we added it, we have to subtract one from
# node.lineno
code = compat.byte_buffer(compat.b('\n') + code)
for message in self.process_python(
code, node.lineno - 1, translator_strings):
yield message
used_translator_comments = True
if used_translator_comments:
translator_comments = []
in_translator_comments = False
if child_nodes:
for extracted in self.extract_nodes(child_nodes):
yield extracted
@staticmethod
def _split_comment(lineno, comment):
"""Return the multiline comment at lineno split into a list of
comment line numbers and the accompanying comment line"""
return [(lineno + index, line) for index, line in
enumerate(comment.splitlines())]