mylar/lib/mako/ext/extract.py

import re
from mako import compat
from mako import lexer
from mako import parsetree


class MessageExtractor(object):

    def process_file(self, fileobj):
        template_node = lexer.Lexer(
            fileobj.read(),
            input_encoding=self.config['encoding']).parse()
        for extracted in self.extract_nodes(template_node.get_children()):
            yield extracted

    def extract_nodes(self, nodes):
        translator_comments = []
        in_translator_comments = False
        input_encoding = self.config['encoding'] or 'ascii'
        comment_tags = list(
            filter(None, re.split(r'\s+', self.config['comment-tags'])))

        for node in nodes:
            child_nodes = None
            if in_translator_comments and \
                    isinstance(node, parsetree.Text) and \
                    not node.content.strip():
                # Ignore whitespace within translator comments
                continue

            if isinstance(node, parsetree.Comment):
                value = node.text.strip()
                if in_translator_comments:
                    translator_comments.extend(
                        self._split_comment(node.lineno, value))
                    continue
                for comment_tag in comment_tags:
                    if value.startswith(comment_tag):
                        in_translator_comments = True
                        translator_comments.extend(
                            self._split_comment(node.lineno, value))
                continue

            if isinstance(node, parsetree.DefTag):
                code = node.function_decl.code
                child_nodes = node.nodes
            elif isinstance(node, parsetree.BlockTag):
                code = node.body_decl.code
                child_nodes = node.nodes
            elif isinstance(node, parsetree.CallTag):
                code = node.code.code
                child_nodes = node.nodes
            elif isinstance(node, parsetree.PageTag):
                code = node.body_decl.code
            elif isinstance(node, parsetree.CallNamespaceTag):
                code = node.expression
                child_nodes = node.nodes
            elif isinstance(node, parsetree.ControlLine):
                if node.isend:
                    in_translator_comments = False
                    continue
                code = node.text
            elif isinstance(node, parsetree.Code):
                in_translator_comments = False
                code = node.code.code
            elif isinstance(node, parsetree.Expression):
                code = node.code.code
            else:
                continue

            # Comments don't apply unless they immediately preceed the message
            if translator_comments and \
                    translator_comments[-1][0] < node.lineno - 1:
                translator_comments = []

            translator_strings = [
                comment[1] for comment in translator_comments]

            if isinstance(code, compat.text_type):
                code = code.encode(input_encoding, 'backslashreplace')

            used_translator_comments = False
            # We add extra newline to work around a pybabel bug
            # (see python-babel/babel#274, parse_encoding dies if the first
            # input string of the input is non-ascii)
            # Also, because we added it, we have to subtract one from
            # node.lineno
            code = compat.byte_buffer(compat.b('\n') + code)

            for message in self.process_python(
                    code, node.lineno - 1, translator_strings):
                yield message
                used_translator_comments = True

            if used_translator_comments:
                translator_comments = []
            in_translator_comments = False

            if child_nodes:
                for extracted in self.extract_nodes(child_nodes):
                    yield extracted

    @staticmethod
    def _split_comment(lineno, comment):
        """Return the multiline comment at lineno split into a list of
        comment line numbers and the accompanying comment line"""
        return [(lineno + index, line) for index, line in
                enumerate(comment.splitlines())]