2019-09-28 04:22:17 +00:00
|
|
|
import re
|
2024-03-03 17:15:23 +00:00
|
|
|
import warnings
|
2024-06-08 10:40:00 +00:00
|
|
|
from typing import Optional, TextIO, Any
|
2024-03-03 17:15:23 +00:00
|
|
|
|
2024-06-08 10:40:00 +00:00
|
|
|
from .base import FormatBase
|
|
|
|
from ..ssaevent import SSAEvent
|
|
|
|
from ..ssastyle import SSAStyle
|
2019-09-28 04:22:17 +00:00
|
|
|
from .substation import parse_tags
|
2024-06-08 10:40:00 +00:00
|
|
|
from ..time import ms_to_times, make_time, TIMESTAMP_SHORT, timestamp_to_ms
|
|
|
|
from ..ssafile import SSAFile
|
|
|
|
|
2019-09-28 04:22:17 +00:00
|
|
|
|
|
|
|
#: Pattern that matches TMP line
|
|
|
|
TMP_LINE = re.compile(r"(\d{1,2}:\d{2}:\d{2}):(.+)")
|
|
|
|
|
|
|
|
#: Largest timestamp allowed in Tmp, ie. 99:59:59.
|
2024-03-03 17:15:23 +00:00
|
|
|
MAX_REPRESENTABLE_TIME = make_time(h=99, m=59, s=59)
|
2019-09-28 04:22:17 +00:00
|
|
|
|
2021-07-14 23:13:28 +00:00
|
|
|
|
2019-09-28 04:22:17 +00:00
|
|
|
class TmpFormat(FormatBase):
|
2021-07-14 23:13:28 +00:00
|
|
|
"""TMP subtitle format implementation"""
|
2022-01-24 04:07:52 +00:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def ms_to_timestamp(ms: int) -> str:
|
|
|
|
"""Convert ms to 'HH:MM:SS'"""
|
2024-03-03 17:15:23 +00:00
|
|
|
if ms < 0:
|
|
|
|
ms = 0
|
|
|
|
if ms > MAX_REPRESENTABLE_TIME:
|
|
|
|
warnings.warn("Overflow in TMP timestamp, clamping to MAX_REPRESENTABLE_TIME", RuntimeWarning)
|
|
|
|
ms = MAX_REPRESENTABLE_TIME
|
|
|
|
h, m, s, _ = ms_to_times(ms)
|
|
|
|
return f"{h:02d}:{m:02d}:{s:02d}"
|
2022-01-24 04:07:52 +00:00
|
|
|
|
2019-09-28 04:22:17 +00:00
|
|
|
@classmethod
|
2024-06-08 10:40:00 +00:00
|
|
|
def guess_format(cls, text: str) -> Optional[str]:
|
2021-07-14 23:13:28 +00:00
|
|
|
"""See :meth:`pysubs2.formats.FormatBase.guess_format()`"""
|
2019-09-28 04:22:17 +00:00
|
|
|
if "[Script Info]" in text or "[V4+ Styles]" in text:
|
|
|
|
# disambiguation vs. SSA/ASS
|
|
|
|
return None
|
|
|
|
|
|
|
|
for line in text.splitlines():
|
|
|
|
if TMP_LINE.match(line) and len(TMP_LINE.findall(line)) == 1:
|
|
|
|
return "tmp"
|
|
|
|
|
2024-06-08 10:40:00 +00:00
|
|
|
return None
|
|
|
|
|
2019-09-28 04:22:17 +00:00
|
|
|
@classmethod
|
2024-06-08 10:40:00 +00:00
|
|
|
def from_file(cls, subs: "SSAFile", fp: TextIO, format_: str, **kwargs: Any) -> None:
|
2021-07-14 23:13:28 +00:00
|
|
|
"""See :meth:`pysubs2.formats.FormatBase.from_file()`"""
|
|
|
|
events = []
|
|
|
|
|
2024-06-08 10:40:00 +00:00
|
|
|
def prepare_text(text: str) -> str:
|
2021-07-14 23:13:28 +00:00
|
|
|
text = text.replace("|", r"\N") # convert newlines
|
2024-06-08 10:40:00 +00:00
|
|
|
text = re.sub(r"< *u *>", r"{\\u1}", text)
|
2021-07-14 23:13:28 +00:00
|
|
|
text = re.sub(r"< */? *[a-zA-Z][^>]*>", "", text) # strip other HTML tags
|
|
|
|
return text
|
2019-09-28 04:22:17 +00:00
|
|
|
|
|
|
|
for line in fp:
|
|
|
|
match = TMP_LINE.match(line)
|
|
|
|
if not match:
|
|
|
|
continue
|
|
|
|
|
|
|
|
start, text = match.groups()
|
2024-06-08 10:40:00 +00:00
|
|
|
match2 = TIMESTAMP_SHORT.match(start)
|
|
|
|
assert match2 is not None, "TMP_LINE contains TIMESTAMP_SHORT"
|
|
|
|
start = timestamp_to_ms(match2.groups())
|
2019-09-28 04:22:17 +00:00
|
|
|
|
2021-07-14 23:13:28 +00:00
|
|
|
# Unfortunately, end timestamp is not given; try to estimate something reasonable:
|
|
|
|
# start + 500 ms + 67 ms/character (15 chars per second)
|
|
|
|
end_guess = start + 500 + (len(line) * 67)
|
|
|
|
|
|
|
|
event = SSAEvent(start=start, end=end_guess, text=prepare_text(text))
|
|
|
|
events.append(event)
|
2019-09-28 04:22:17 +00:00
|
|
|
|
2021-07-14 23:13:28 +00:00
|
|
|
# correct any overlapping subtitles created by end_guess
|
|
|
|
for i in range(len(events) - 1):
|
|
|
|
events[i].end = min(events[i].end, events[i+1].start)
|
|
|
|
|
|
|
|
subs.events = events
|
2019-09-28 04:22:17 +00:00
|
|
|
|
|
|
|
@classmethod
|
2024-06-08 10:40:00 +00:00
|
|
|
def to_file(cls, subs: "SSAFile", fp: TextIO, format_: str, apply_styles: bool = True, **kwargs: Any) -> None:
|
2021-07-14 23:13:28 +00:00
|
|
|
"""
|
|
|
|
See :meth:`pysubs2.formats.FormatBase.to_file()`
|
|
|
|
|
|
|
|
Italic, underline and strikeout styling is supported.
|
|
|
|
|
|
|
|
Keyword args:
|
|
|
|
apply_styles: If False, do not write any styling.
|
|
|
|
|
|
|
|
"""
|
2024-06-08 10:40:00 +00:00
|
|
|
def prepare_text(text: str, style: SSAStyle) -> str:
|
2019-09-28 04:22:17 +00:00
|
|
|
body = []
|
|
|
|
for fragment, sty in parse_tags(text, style, subs.styles):
|
|
|
|
fragment = fragment.replace(r"\h", " ")
|
|
|
|
fragment = fragment.replace(r"\n", "\n")
|
|
|
|
fragment = fragment.replace(r"\N", "\n")
|
2021-07-14 23:13:28 +00:00
|
|
|
if apply_styles:
|
2024-06-08 10:40:00 +00:00
|
|
|
if sty.italic:
|
|
|
|
fragment = f"<i>{fragment}</i>"
|
|
|
|
if sty.underline:
|
|
|
|
fragment = f"<u>{fragment}</u>"
|
|
|
|
if sty.strikeout:
|
|
|
|
fragment = f"<s>{fragment}</s>"
|
2019-09-28 04:22:17 +00:00
|
|
|
body.append(fragment)
|
|
|
|
|
2024-06-08 10:40:00 +00:00
|
|
|
return re.sub("\n+", "\n", "".join(body).strip())
|
2019-09-28 04:22:17 +00:00
|
|
|
|
2024-06-08 10:40:00 +00:00
|
|
|
for line in subs.get_text_events():
|
2022-01-24 04:07:52 +00:00
|
|
|
start = cls.ms_to_timestamp(line.start)
|
2019-09-28 04:22:17 +00:00
|
|
|
text = prepare_text(line.text, subs.styles.get(line.style, SSAStyle.DEFAULT_STYLE))
|
|
|
|
|
|
|
|
print(start + ":" + text, end="\n", file=fp)
|