mirror of https://github.com/morpheus65535/bazarr
381 lines
16 KiB
Python
381 lines
16 KiB
Python
import logging
|
|
import re
|
|
import warnings
|
|
from numbers import Number
|
|
from typing import Any, Union, Optional, Dict
|
|
|
|
import pysubs2
|
|
from .formatbase import FormatBase
|
|
from .ssaevent import SSAEvent
|
|
from .ssastyle import SSAStyle
|
|
from .common import Color, Alignment, SSA_ALIGNMENT
|
|
from .time import make_time, ms_to_times, timestamp_to_ms, TIMESTAMP, TIMESTAMP_SHORT
|
|
|
|
|
|
def ass_to_ssa_alignment(i):
|
|
warnings.warn("ass_to_ssa_alignment function is deprecated, please use the Alignment enum", DeprecationWarning)
|
|
return SSA_ALIGNMENT[i-1]
|
|
|
|
def ssa_to_ass_alignment(i):
|
|
warnings.warn("ssa_to_ass_alignment function is deprecated, please use the Alignment enum", DeprecationWarning)
|
|
return SSA_ALIGNMENT.index(i) + 1
|
|
|
|
SECTION_HEADING = re.compile(
|
|
r"^.{,3}" # allow 3 chars at start of line for BOM
|
|
r"\[" # open square bracket
|
|
r"[^]]*[a-z][^]]*" # inside square brackets, at least one lowercase letter (this guards vs. uuencoded font data)
|
|
r"]" # close square bracket
|
|
)
|
|
|
|
ATTACHMENT_FILE_HEADING = re.compile(r"(fontname|filename):\s+(?P<name>\S+)")
|
|
|
|
STYLE_FORMAT_LINE = {
|
|
"ass": "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic,"
|
|
" Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment,"
|
|
" MarginL, MarginR, MarginV, Encoding",
|
|
"ssa": "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, TertiaryColour, BackColour, Bold, Italic,"
|
|
" BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, AlphaLevel, Encoding"
|
|
}
|
|
|
|
STYLE_FIELDS = {
|
|
"ass": ["fontname", "fontsize", "primarycolor", "secondarycolor", "outlinecolor", "backcolor", "bold", "italic",
|
|
"underline", "strikeout", "scalex", "scaley", "spacing", "angle", "borderstyle", "outline", "shadow",
|
|
"alignment", "marginl", "marginr", "marginv", "encoding"],
|
|
"ssa": ["fontname", "fontsize", "primarycolor", "secondarycolor", "tertiarycolor", "backcolor", "bold", "italic",
|
|
"borderstyle", "outline", "shadow", "alignment", "marginl", "marginr", "marginv", "alphalevel", "encoding"]
|
|
}
|
|
|
|
EVENT_FORMAT_LINE = {
|
|
"ass": "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text",
|
|
"ssa": "Format: Marked, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text"
|
|
}
|
|
|
|
EVENT_FIELDS = {
|
|
"ass": ["layer", "start", "end", "style", "name", "marginl", "marginr", "marginv", "effect", "text"],
|
|
"ssa": ["marked", "start", "end", "style", "name", "marginl", "marginr", "marginv", "effect", "text"]
|
|
}
|
|
|
|
#: Largest timestamp allowed in SubStation, ie. 9:59:59.99.
|
|
MAX_REPRESENTABLE_TIME = make_time(h=10) - 10
|
|
|
|
def color_to_ass_rgba(c: Color) -> str:
|
|
return f"&H{((c.a << 24) | (c.b << 16) | (c.g << 8) | c.r):08X}"
|
|
|
|
def color_to_ssa_rgb(c: Color) -> str:
|
|
return f"{((c.b << 16) | (c.g << 8) | c.r)}"
|
|
|
|
def rgba_to_color(s: str) -> Color:
|
|
if s[0] == '&':
|
|
x = int(s[2:], base=16)
|
|
else:
|
|
x = int(s)
|
|
r = x & 0xff
|
|
g = (x >> 8) & 0xff
|
|
b = (x >> 16) & 0xff
|
|
a = (x >> 24) & 0xff
|
|
return Color(r, g, b, a)
|
|
|
|
def is_valid_field_content(s: str) -> bool:
|
|
"""
|
|
Returns True if string s can be stored in a SubStation field.
|
|
|
|
Fields are written in CSV-like manner, thus commas and/or newlines
|
|
are not acceptable in the string.
|
|
|
|
"""
|
|
return "\n" not in s and "," not in s
|
|
|
|
|
|
def parse_tags(text: str, style: SSAStyle = SSAStyle.DEFAULT_STYLE, styles: Optional[Dict[str, SSAStyle]] = None):
|
|
"""
|
|
Split text into fragments with computed SSAStyles.
|
|
|
|
Returns list of tuples (fragment, style), where fragment is a part of text
|
|
between two brace-delimited override sequences, and style is the computed
|
|
styling of the fragment, ie. the original style modified by all override
|
|
sequences before the fragment.
|
|
|
|
Newline and non-breakable space overrides are left as-is.
|
|
|
|
Supported override tags:
|
|
|
|
- i, b, u, s
|
|
- r (with or without style name)
|
|
|
|
"""
|
|
if styles is None:
|
|
styles = {}
|
|
|
|
fragments = SSAEvent.OVERRIDE_SEQUENCE.split(text)
|
|
if len(fragments) == 1:
|
|
return [(text, style)]
|
|
|
|
def apply_overrides(all_overrides: str) -> SSAStyle:
|
|
s = style.copy()
|
|
for tag in re.findall(r"\\[ibusp][0-9]|\\r[a-zA-Z_0-9 ]*", all_overrides):
|
|
if tag == r"\r":
|
|
s = style.copy() # reset to original line style
|
|
elif tag.startswith(r"\r"):
|
|
name = tag[2:]
|
|
if name in styles: # type: ignore[operator]
|
|
# reset to named style
|
|
s = styles[name].copy() # type: ignore[index]
|
|
else:
|
|
if "i" in tag: s.italic = "1" in tag
|
|
elif "b" in tag: s.bold = "1" in tag
|
|
elif "u" in tag: s.underline = "1" in tag
|
|
elif "s" in tag: s.strikeout = "1" in tag
|
|
elif "p" in tag:
|
|
try:
|
|
scale = int(tag[2:])
|
|
except (ValueError, IndexError):
|
|
continue
|
|
|
|
s.drawing = scale > 0
|
|
return s
|
|
|
|
overrides = SSAEvent.OVERRIDE_SEQUENCE.findall(text)
|
|
overrides_prefix_sum = ["".join(overrides[:i]) for i in range(len(overrides) + 1)]
|
|
computed_styles = map(apply_overrides, overrides_prefix_sum)
|
|
return list(zip(fragments, computed_styles))
|
|
|
|
|
|
NOTICE = "Script generated by pysubs2\nhttps://pypi.python.org/pypi/pysubs2"
|
|
|
|
class SubstationFormat(FormatBase):
|
|
"""SubStation Alpha (ASS, SSA) subtitle format implementation"""
|
|
|
|
@staticmethod
|
|
def ms_to_timestamp(ms: int) -> str:
|
|
"""Convert ms to 'H:MM:SS.cc'"""
|
|
if ms < 0:
|
|
ms = 0
|
|
if ms > MAX_REPRESENTABLE_TIME:
|
|
warnings.warn("Overflow in SubStation timestamp, clamping to MAX_REPRESENTABLE_TIME", RuntimeWarning)
|
|
ms = MAX_REPRESENTABLE_TIME
|
|
|
|
h, m, s, ms = ms_to_times(ms)
|
|
|
|
# Aegisub does rounding, see https://github.com/Aegisub/Aegisub/blob/6f546951b4f004da16ce19ba638bf3eedefb9f31/libaegisub/include/libaegisub/ass/time.h#L32
|
|
cs = ((ms + 5) - (ms + 5) % 10) // 10
|
|
|
|
return f"{h:01d}:{m:02d}:{s:02d}.{cs:02d}"
|
|
|
|
@classmethod
|
|
def guess_format(cls, text):
|
|
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
|
|
if re.search(r"V4\+ Styles", text, re.IGNORECASE):
|
|
return "ass"
|
|
elif re.search(r"V4 Styles", text, re.IGNORECASE):
|
|
return "ssa"
|
|
|
|
@classmethod
|
|
def from_file(cls, subs: "pysubs2.SSAFile", fp, format_, **kwargs):
|
|
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
|
|
|
|
def string_to_field(f: str, v: str):
|
|
# Per issue #45, we should handle the case where there is extra whitespace around the values.
|
|
# Extra whitespace is removed in non-string fields where it would break the parser otherwise,
|
|
# and in font name (where it doesn't really make sense). It is preserved in Dialogue string
|
|
# fields like Text, Name and Effect (to avoid introducing unnecessary change to parser output).
|
|
|
|
if f in {"start", "end"}:
|
|
v = v.strip()
|
|
if v.startswith("-"):
|
|
# handle negative timestamps
|
|
v = v[1:]
|
|
sign = -1
|
|
else:
|
|
sign = 1
|
|
|
|
m = TIMESTAMP.match(v)
|
|
if m is None:
|
|
m = TIMESTAMP_SHORT.match(v)
|
|
if m is None:
|
|
raise ValueError(f"Failed to parse timestamp: {v!r}")
|
|
|
|
return sign * timestamp_to_ms(m.groups())
|
|
elif "color" in f:
|
|
v = v.strip()
|
|
return rgba_to_color(v)
|
|
elif f in {"bold", "underline", "italic", "strikeout"}:
|
|
return v == "-1"
|
|
elif f in {"borderstyle", "encoding", "marginl", "marginr", "marginv", "layer", "alphalevel"}:
|
|
return int(v)
|
|
elif f in {"fontsize", "scalex", "scaley", "spacing", "angle", "outline", "shadow"}:
|
|
return float(v)
|
|
elif f == "marked":
|
|
return v.endswith("1")
|
|
elif f == "alignment":
|
|
try:
|
|
if format_ == "ass":
|
|
return Alignment(int(v))
|
|
else:
|
|
return Alignment.from_ssa_alignment(int(v))
|
|
except Exception:
|
|
warnings.warn("Failed to parse alignment, using default", RuntimeWarning)
|
|
return Alignment.BOTTOM_CENTER
|
|
elif f == "fontname":
|
|
return v.strip()
|
|
else:
|
|
return v
|
|
|
|
subs.info.clear()
|
|
subs.aegisub_project.clear()
|
|
subs.styles.clear()
|
|
subs.fonts_opaque.clear()
|
|
subs.graphics_opaque.clear()
|
|
|
|
inside_info_section = False
|
|
inside_aegisub_section = False
|
|
inside_font_section = False
|
|
inside_graphic_section = False
|
|
current_attachment_name = None
|
|
current_attachment_lines_buffer = []
|
|
current_attachment_is_font = None
|
|
|
|
for lineno, line in enumerate(fp, 1):
|
|
line = line.strip()
|
|
|
|
if SECTION_HEADING.match(line):
|
|
logging.debug("at line %d: section heading %s", lineno, line)
|
|
inside_info_section = "Info" in line
|
|
inside_aegisub_section = "Aegisub" in line
|
|
inside_font_section = "Fonts" in line
|
|
inside_graphic_section = "Graphics" in line
|
|
elif inside_info_section or inside_aegisub_section:
|
|
if line.startswith(";"): continue # skip comments
|
|
try:
|
|
k, v = line.split(":", 1)
|
|
if inside_info_section:
|
|
subs.info[k] = v.strip()
|
|
elif inside_aegisub_section:
|
|
subs.aegisub_project[k] = v.strip()
|
|
except ValueError:
|
|
pass
|
|
elif inside_font_section or inside_graphic_section:
|
|
m = ATTACHMENT_FILE_HEADING.match(line)
|
|
current_attachment_is_font = inside_font_section
|
|
|
|
if current_attachment_name and (m or not line):
|
|
# flush last font/picture on newline or new font/picture name
|
|
attachment_data = current_attachment_lines_buffer[:]
|
|
if inside_font_section:
|
|
subs.fonts_opaque[current_attachment_name] = attachment_data
|
|
elif inside_graphic_section:
|
|
subs.graphics_opaque[current_attachment_name] = attachment_data
|
|
else:
|
|
raise NotImplementedError("Bad attachment section, expected [Fonts] or [Graphics]")
|
|
logging.debug("at line %d: finished attachment definition %s", lineno, current_attachment_name)
|
|
current_attachment_lines_buffer.clear()
|
|
current_attachment_name = None
|
|
|
|
if m:
|
|
# start new font/picture
|
|
attachment_name = m.group("name")
|
|
current_attachment_name = attachment_name
|
|
elif line:
|
|
# add non-empty line to current buffer
|
|
current_attachment_lines_buffer.append(line)
|
|
elif line.startswith("Style:"):
|
|
_, rest = line.split(":", 1)
|
|
buf = rest.strip().split(",")
|
|
name, raw_fields = buf[0], buf[1:] # splat workaround for Python 2.7
|
|
field_dict = {f: string_to_field(f, v) for f, v in zip(STYLE_FIELDS[format_], raw_fields)}
|
|
sty = SSAStyle(**field_dict)
|
|
subs.styles[name] = sty
|
|
elif line.startswith("Dialogue:") or line.startswith("Comment:"):
|
|
ev_type, rest = line.split(":", 1)
|
|
raw_fields = rest.strip().split(",", len(EVENT_FIELDS[format_])-1)
|
|
field_dict = {f: string_to_field(f, v) for f, v in zip(EVENT_FIELDS[format_], raw_fields)}
|
|
field_dict["type"] = ev_type
|
|
ev = SSAEvent(**field_dict)
|
|
subs.events.append(ev)
|
|
|
|
# cleanup fonts/pictures
|
|
if current_attachment_name:
|
|
# flush last font on EOF or new section w/o newline
|
|
attachment_data = current_attachment_lines_buffer[:]
|
|
|
|
if current_attachment_is_font:
|
|
subs.fonts_opaque[current_attachment_name] = attachment_data
|
|
else:
|
|
subs.graphics_opaque[current_attachment_name] = attachment_data
|
|
|
|
logging.debug("at EOF: finished attachment definition %s", current_attachment_name)
|
|
current_attachment_lines_buffer.clear()
|
|
current_attachment_name = None
|
|
|
|
@classmethod
|
|
def to_file(cls, subs: "pysubs2.SSAFile", fp, format_, header_notice=NOTICE, **kwargs):
|
|
"""See :meth:`pysubs2.formats.FormatBase.to_file()`"""
|
|
print("[Script Info]", file=fp)
|
|
for line in header_notice.splitlines(False):
|
|
print(";", line, file=fp)
|
|
|
|
subs.info["ScriptType"] = "v4.00+" if format_ == "ass" else "v4.00"
|
|
for k, v in subs.info.items():
|
|
print(k, v, sep=": ", file=fp)
|
|
|
|
if subs.aegisub_project:
|
|
print("\n[Aegisub Project Garbage]", file=fp)
|
|
for k, v in subs.aegisub_project.items():
|
|
print(k, v, sep=": ", file=fp)
|
|
|
|
def field_to_string(f: str, v: Any, line: Union[SSAEvent, SSAStyle]):
|
|
if f in {"start", "end"}:
|
|
return cls.ms_to_timestamp(v)
|
|
elif f == "marked":
|
|
return f"Marked={v:d}"
|
|
elif f == "alignment":
|
|
if isinstance(v, Alignment):
|
|
alignment = v
|
|
else:
|
|
warnings.warn("The 'alignment' attribute of SSAStyle should be an Alignment instance, using plain int is deprecated", DeprecationWarning)
|
|
alignment = Alignment(v)
|
|
|
|
if format_ == "ssa":
|
|
return str(alignment.to_ssa_alignment())
|
|
else:
|
|
return str(alignment.value)
|
|
elif isinstance(v, bool):
|
|
return "-1" if v else "0"
|
|
elif isinstance(v, (str, Number)):
|
|
return str(v)
|
|
elif isinstance(v, Color):
|
|
if format_ == "ass":
|
|
return color_to_ass_rgba(v)
|
|
else:
|
|
return color_to_ssa_rgb(v)
|
|
else:
|
|
raise TypeError(f"Unexpected type when writing a SubStation field {f!r} for line {line!r}")
|
|
|
|
print("\n[V4+ Styles]" if format_ == "ass" else "\n[V4 Styles]", file=fp)
|
|
print(STYLE_FORMAT_LINE[format_], file=fp)
|
|
for name, sty in subs.styles.items():
|
|
fields = [field_to_string(f, getattr(sty, f), sty) for f in STYLE_FIELDS[format_]]
|
|
print(f"Style: {name}", *fields, sep=",", file=fp)
|
|
|
|
if subs.fonts_opaque:
|
|
print("\n[Fonts]", file=fp)
|
|
for font_name, font_lines in sorted(subs.fonts_opaque.items()):
|
|
print(f"fontname: {font_name}", file=fp)
|
|
for line in font_lines:
|
|
print(line, file=fp)
|
|
print(file=fp)
|
|
|
|
if subs.graphics_opaque:
|
|
print("\n[Graphics]", file=fp)
|
|
for picture_name, picture_lines in sorted(subs.graphics_opaque.items()):
|
|
print(f"filename: {picture_name}", file=fp)
|
|
for line in picture_lines:
|
|
print(line, file=fp)
|
|
print(file=fp)
|
|
|
|
print("\n[Events]", file=fp)
|
|
print(EVENT_FORMAT_LINE[format_], file=fp)
|
|
for ev in subs.events:
|
|
fields = [field_to_string(f, getattr(ev, f), ev) for f in EVENT_FIELDS[format_]]
|
|
print(ev.type, end=": ", file=fp)
|
|
print(*fields, sep=",", file=fp)
|