bazarr/libs/mako/ext/extract.py

# ext/extract.py
# Copyright 2006-2024 the Mako authors and contributors <see AUTHORS file>
#
# This module is part of Mako and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php

from io import BytesIO
from io import StringIO
import re

from mako import lexer
from mako import parsetree


class MessageExtractor:
    use_bytes = True

    def process_file(self, fileobj):
        template_node = lexer.Lexer(
            fileobj.read(), input_encoding=self.config["encoding"]
        ).parse()
        yield from self.extract_nodes(template_node.get_children())

    def extract_nodes(self, nodes):
        translator_comments = []
        in_translator_comments = False
        input_encoding = self.config["encoding"] or "ascii"
        comment_tags = list(
            filter(None, re.split(r"\s+", self.config["comment-tags"]))
        )

        for node in nodes:
            child_nodes = None
            if (
                in_translator_comments
                and isinstance(node, parsetree.Text)
                and not node.content.strip()
            ):
                # Ignore whitespace within translator comments
                continue

            if isinstance(node, parsetree.Comment):
                value = node.text.strip()
                if in_translator_comments:
                    translator_comments.extend(
                        self._split_comment(node.lineno, value)
                    )
                    continue
                for comment_tag in comment_tags:
                    if value.startswith(comment_tag):
                        in_translator_comments = True
                        translator_comments.extend(
                            self._split_comment(node.lineno, value)
                        )
                continue

            if isinstance(node, parsetree.DefTag):
                code = node.function_decl.code
                child_nodes = node.nodes
            elif isinstance(node, parsetree.BlockTag):
                code = node.body_decl.code
                child_nodes = node.nodes
            elif isinstance(node, parsetree.CallTag):
                code = node.code.code
                child_nodes = node.nodes
            elif isinstance(node, parsetree.PageTag):
                code = node.body_decl.code
            elif isinstance(node, parsetree.CallNamespaceTag):
                code = node.expression
                child_nodes = node.nodes
            elif isinstance(node, parsetree.ControlLine):
                if node.isend:
                    in_translator_comments = False
                    continue
                code = node.text
            elif isinstance(node, parsetree.Code):
                in_translator_comments = False
                code = node.code.code
            elif isinstance(node, parsetree.Expression):
                code = node.code.code
            else:
                continue

            # Comments don't apply unless they immediately precede the message
            if (
                translator_comments
                and translator_comments[-1][0] < node.lineno - 1
            ):
                translator_comments = []

            translator_strings = [
                comment[1] for comment in translator_comments
            ]

            if isinstance(code, str) and self.use_bytes:
                code = code.encode(input_encoding, "backslashreplace")

            used_translator_comments = False
            # We add extra newline to work around a pybabel bug
            # (see python-babel/babel#274, parse_encoding dies if the first
            # input string of the input is non-ascii)
            # Also, because we added it, we have to subtract one from
            # node.lineno
            if self.use_bytes:
                code = BytesIO(b"\n" + code)
            else:
                code = StringIO("\n" + code)

            for message in self.process_python(
                code, node.lineno - 1, translator_strings
            ):
                yield message
                used_translator_comments = True

            if used_translator_comments:
                translator_comments = []
            in_translator_comments = False

            if child_nodes:
                yield from self.extract_nodes(child_nodes)

    @staticmethod
    def _split_comment(lineno, comment):
        """Return the multiline comment at lineno split into a list of
        comment line numbers and the accompanying comment line"""
        return [
            (lineno + index, line)
            for index, line in enumerate(comment.splitlines())
        ]