mirror of https://github.com/morpheus65535/bazarr
130 lines
4.5 KiB
Python
130 lines
4.5 KiB
Python
# ext/extract.py
|
|
# Copyright 2006-2022 the Mako authors and contributors <see AUTHORS file>
|
|
#
|
|
# This module is part of Mako and is released under
|
|
# the MIT License: http://www.opensource.org/licenses/mit-license.php
|
|
|
|
from io import BytesIO
|
|
from io import StringIO
|
|
import re
|
|
|
|
from mako import lexer
|
|
from mako import parsetree
|
|
|
|
|
|
class MessageExtractor:
|
|
use_bytes = True
|
|
|
|
def process_file(self, fileobj):
|
|
template_node = lexer.Lexer(
|
|
fileobj.read(), input_encoding=self.config["encoding"]
|
|
).parse()
|
|
yield from self.extract_nodes(template_node.get_children())
|
|
|
|
def extract_nodes(self, nodes):
|
|
translator_comments = []
|
|
in_translator_comments = False
|
|
input_encoding = self.config["encoding"] or "ascii"
|
|
comment_tags = list(
|
|
filter(None, re.split(r"\s+", self.config["comment-tags"]))
|
|
)
|
|
|
|
for node in nodes:
|
|
child_nodes = None
|
|
if (
|
|
in_translator_comments
|
|
and isinstance(node, parsetree.Text)
|
|
and not node.content.strip()
|
|
):
|
|
# Ignore whitespace within translator comments
|
|
continue
|
|
|
|
if isinstance(node, parsetree.Comment):
|
|
value = node.text.strip()
|
|
if in_translator_comments:
|
|
translator_comments.extend(
|
|
self._split_comment(node.lineno, value)
|
|
)
|
|
continue
|
|
for comment_tag in comment_tags:
|
|
if value.startswith(comment_tag):
|
|
in_translator_comments = True
|
|
translator_comments.extend(
|
|
self._split_comment(node.lineno, value)
|
|
)
|
|
continue
|
|
|
|
if isinstance(node, parsetree.DefTag):
|
|
code = node.function_decl.code
|
|
child_nodes = node.nodes
|
|
elif isinstance(node, parsetree.BlockTag):
|
|
code = node.body_decl.code
|
|
child_nodes = node.nodes
|
|
elif isinstance(node, parsetree.CallTag):
|
|
code = node.code.code
|
|
child_nodes = node.nodes
|
|
elif isinstance(node, parsetree.PageTag):
|
|
code = node.body_decl.code
|
|
elif isinstance(node, parsetree.CallNamespaceTag):
|
|
code = node.expression
|
|
child_nodes = node.nodes
|
|
elif isinstance(node, parsetree.ControlLine):
|
|
if node.isend:
|
|
in_translator_comments = False
|
|
continue
|
|
code = node.text
|
|
elif isinstance(node, parsetree.Code):
|
|
in_translator_comments = False
|
|
code = node.code.code
|
|
elif isinstance(node, parsetree.Expression):
|
|
code = node.code.code
|
|
else:
|
|
continue
|
|
|
|
# Comments don't apply unless they immediately precede the message
|
|
if (
|
|
translator_comments
|
|
and translator_comments[-1][0] < node.lineno - 1
|
|
):
|
|
translator_comments = []
|
|
|
|
translator_strings = [
|
|
comment[1] for comment in translator_comments
|
|
]
|
|
|
|
if isinstance(code, str) and self.use_bytes:
|
|
code = code.encode(input_encoding, "backslashreplace")
|
|
|
|
used_translator_comments = False
|
|
# We add extra newline to work around a pybabel bug
|
|
# (see python-babel/babel#274, parse_encoding dies if the first
|
|
# input string of the input is non-ascii)
|
|
# Also, because we added it, we have to subtract one from
|
|
# node.lineno
|
|
if self.use_bytes:
|
|
code = BytesIO(b"\n" + code)
|
|
else:
|
|
code = StringIO("\n" + code)
|
|
|
|
for message in self.process_python(
|
|
code, node.lineno - 1, translator_strings
|
|
):
|
|
yield message
|
|
used_translator_comments = True
|
|
|
|
if used_translator_comments:
|
|
translator_comments = []
|
|
in_translator_comments = False
|
|
|
|
if child_nodes:
|
|
yield from self.extract_nodes(child_nodes)
|
|
|
|
@staticmethod
|
|
def _split_comment(lineno, comment):
|
|
"""Return the multiline comment at lineno split into a list of
|
|
comment line numbers and the accompanying comment line"""
|
|
return [
|
|
(lineno + index, line)
|
|
for index, line in enumerate(comment.splitlines())
|
|
]
|