bazarr/libs/pysubs2/formats/microdvd.py

145 lines
5.8 KiB
Python

from functools import partial
import re
from typing import Optional, TextIO, Any, Match
from ..exceptions import UnknownFPSError
from ..ssaevent import SSAEvent
from ..ssastyle import SSAStyle
from .base import FormatBase
from .substation import parse_tags
from ..time import ms_to_frames, frames_to_ms
from ..ssafile import SSAFile
#: Matches a MicroDVD line.
MICRODVD_LINE = re.compile(r" *\{ *(\d+) *\} *\{ *(\d+) *\}(.+)")
class MicroDVDFormat(FormatBase):
"""MicroDVD subtitle format implementation"""
@classmethod
def guess_format(cls, text: str) -> Optional[str]:
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
if any(map(MICRODVD_LINE.match, text.splitlines())):
return "microdvd"
else:
return None
@classmethod
def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, fps: Optional[float] = None,
strict_fps_inference: bool = True, **kwargs: Any) -> None:
"""
See :meth:`pysubs2.formats.FormatBase.from_file()`
Keyword args:
strict_fps_inference: If True (default), in the case when ``fps`` is not given, it will be read
from the first subtitle text only if the start and end frame of this subtitle is ``{1}{1}``
(matches VLC Player behaviour), otherwise :class:`pysubs2.exceptions.UnknownFPSError` is raised.
When ``strict_fps_inference``
is False, framerate will be read from the first subtitle text in this case regardless of
start and end frame (which may result in bogus result, if the first subtitle is not supposed
to contain framerate). Before introduction of this option, the library behaved as if this
option was False.
.. versionchanged:: 1.7.0
Added the ``strict_fps_inference`` option.
"""
for line in fp:
match = MICRODVD_LINE.match(line)
if not match:
continue
fstart, fend, text = match.groups()
fstart, fend = map(int, (fstart, fend))
if fps is None:
# We don't know the framerate, but it is customary to include it as text of the first subtitle,
# in the format {1}{1}fps, see pysubs2 issue #71 or VLC player source:
# https://code.videolan.org/videolan/vlc/-/blob/dccda0e133ff0a2e85de727cf19ddbc634f06b67/modules/demux/subtitle.c#L1014
# In that case, we skip this auxiliary subtitle and proceed with reading.
try:
if strict_fps_inference and not (fstart == 1 and fend == 1):
raise ValueError("Frame mismatch, expected {1}{1}")
fps = float(text)
subs.fps = fps
continue
except ValueError:
raise UnknownFPSError("Framerate was not specified and "
"cannot be read from "
"the MicroDVD file.")
start, end = map(partial(frames_to_ms, fps=fps), (fstart, fend))
def prepare_text(text: str) -> str:
text = text.replace("|", r"\N")
def style_replacer(match: Match[str]) -> str:
tags = [c for c in "biu" if c in match.group(0)]
return "{%s}" % "".join(f"\\{c}1" for c in tags)
text = re.sub(r"\{[Yy]:[^}]+\}", style_replacer, text)
text = re.sub(r"\{[Ff]:([^}]+)\}", r"{\\fn\1}", text)
text = re.sub(r"\{[Ss]:([^}]+)\}", r"{\\fs\1}", text)
text = re.sub(r"\{P:(\d+),(\d+)\}", r"{\\pos(\1,\2)}", text)
return text.strip()
ev = SSAEvent(start=start, end=end, text=prepare_text(text))
subs.append(ev)
@classmethod
def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, fps: Optional[float] = None,
write_fps_declaration: bool = True, apply_styles: bool = True, **kwargs: Any) -> None:
"""
See :meth:`pysubs2.formats.FormatBase.to_file()`
The only supported styling is marking whole lines italic.
Keyword args:
write_fps_declaration: If True, create a zero-duration first subtitle ``{1}{1}`` which will contain
the fps.
apply_styles: If False, do not write any styling.
"""
if fps is None:
fps = subs.fps
if fps is None:
raise UnknownFPSError("Framerate must be specified when writing MicroDVD.")
to_frames = partial(ms_to_frames, fps=fps)
def is_entirely_italic(line: SSAEvent) -> bool:
style = subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE)
for fragment, sty in parse_tags(line.text, style, subs.styles):
fragment = fragment.replace(r"\h", " ")
fragment = fragment.replace(r"\n", "\n")
fragment = fragment.replace(r"\N", "\n")
if not sty.italic and fragment and not fragment.isspace():
return False
return True
# insert an artificial first line telling the framerate
if write_fps_declaration:
subs.insert(0, SSAEvent(start=1, end=1, text=str(fps)))
for line in subs.get_text_events():
text = "|".join(line.plaintext.splitlines())
if apply_styles and is_entirely_italic(line):
text = "{Y:i}" + text
start, end = map(to_frames, (line.start, line.end))
# XXX warn on underflow?
if start < 0:
start = 0
if end < 0:
end = 0
print("{%d}{%d}%s" % (start, end, text), file=fp)
# remove the artificial framerate-telling line
if write_fps_declaration:
subs.pop(0)