import re from mako import compat from mako import lexer from mako import parsetree class MessageExtractor(object): def process_file(self, fileobj): template_node = lexer.Lexer( fileobj.read(), input_encoding=self.config['encoding']).parse() for extracted in self.extract_nodes(template_node.get_children()): yield extracted def extract_nodes(self, nodes): translator_comments = [] in_translator_comments = False input_encoding = self.config['encoding'] or 'ascii' comment_tags = list( filter(None, re.split(r'\s+', self.config['comment-tags']))) for node in nodes: child_nodes = None if in_translator_comments and \ isinstance(node, parsetree.Text) and \ not node.content.strip(): # Ignore whitespace within translator comments continue if isinstance(node, parsetree.Comment): value = node.text.strip() if in_translator_comments: translator_comments.extend( self._split_comment(node.lineno, value)) continue for comment_tag in comment_tags: if value.startswith(comment_tag): in_translator_comments = True translator_comments.extend( self._split_comment(node.lineno, value)) continue if isinstance(node, parsetree.DefTag): code = node.function_decl.code child_nodes = node.nodes elif isinstance(node, parsetree.BlockTag): code = node.body_decl.code child_nodes = node.nodes elif isinstance(node, parsetree.CallTag): code = node.code.code child_nodes = node.nodes elif isinstance(node, parsetree.PageTag): code = node.body_decl.code elif isinstance(node, parsetree.CallNamespaceTag): code = node.expression child_nodes = node.nodes elif isinstance(node, parsetree.ControlLine): if node.isend: in_translator_comments = False continue code = node.text elif isinstance(node, parsetree.Code): in_translator_comments = False code = node.code.code elif isinstance(node, parsetree.Expression): code = node.code.code else: continue # Comments don't apply unless they immediately preceed the message if translator_comments and \ translator_comments[-1][0] < node.lineno - 1: translator_comments = [] translator_strings = [ comment[1] for comment in translator_comments] if isinstance(code, compat.text_type): code = code.encode(input_encoding, 'backslashreplace') used_translator_comments = False # We add extra newline to work around a pybabel bug # (see python-babel/babel#274, parse_encoding dies if the first # input string of the input is non-ascii) # Also, because we added it, we have to subtract one from # node.lineno code = compat.byte_buffer(compat.b('\n') + code) for message in self.process_python( code, node.lineno - 1, translator_strings): yield message used_translator_comments = True if used_translator_comments: translator_comments = [] in_translator_comments = False if child_nodes: for extracted in self.extract_nodes(child_nodes): yield extracted @staticmethod def _split_comment(lineno, comment): """Return the multiline comment at lineno split into a list of comment line numbers and the accompanying comment line""" return [(lineno + index, line) for index, line in enumerate(comment.splitlines())]