bazarr/libs/fese/tags.py

185 lines
5.0 KiB
Python

from datetime import timedelta
import logging
from babelfish import Language
from babelfish.exceptions import LanguageError
from .exceptions import LanguageNotFound
logger = logging.getLogger(__name__)
LANGUAGE_FALLBACK = None
class FFprobeGenericSubtitleTags:
_DETECTABLE_TAGS = None
def __init__(self, data: dict):
try:
self.language = _get_language(data)
except LanguageNotFound:
if LANGUAGE_FALLBACK is not None:
self.language = Language.fromietf(LANGUAGE_FALLBACK)
else:
raise
self._data = data
@classmethod
def detect_cls_from_data(cls, data):
for cls_ in (FFprobeMkvSubtitleTags, FFprobeMp4SubtitleTags):
if cls_.is_compatible(data):
logger.debug("Detected tags class: %s", cls_)
return cls_(data)
logger.debug("Unable to detect tags class. Using generic")
return FFprobeGenericSubtitleTags(data)
@property
def suffix(self):
lang = self.language.alpha2
if self.language.country is not None:
lang = f"{lang}-{self.language.country}"
return str(lang)
@property
def frames(self):
return 0
@classmethod
def is_compatible(cls, data):
return False
def __str__(self) -> str:
return f"{type(self).__name__}: {self.suffix}"
class FFprobeMkvSubtitleTags(FFprobeGenericSubtitleTags):
_DETECTABLE_TAGS = (
"BPS",
"BPS-eng",
"DURATION",
"DURATION-eng",
"NUMBER_OF_FRAMES",
"NUMBER_OF_FRAMES-eng",
"NUMBER_OF_BYTES",
"NUMBER_OF_BYTES-eng",
)
def __init__(self, data: dict):
super().__init__(data)
self.title = data.get("title")
self.bps = _safe_int(data.get("BPS"))
self.bps_eng = _safe_int(data.get("BPS-eng"))
self.duration = _safe_td(data.get("DURATION"))
self.duration_eng = _safe_td(data.get("DURATION-eng"))
self.number_of_frames = _safe_int(data.get("NUMBER_OF_FRAMES"))
self.number_of_frames_eng = _safe_int(data.get("NUMBER_OF_FRAMES-eng"))
self.number_of_bytes = _safe_int(data.get("NUMBER_OF_BYTES"))
self.number_of_bytes_eng = _safe_int(data.get("NUMBER_OF_BYTES-eng"))
@property
def frames(self):
return self.number_of_frames or self.number_of_frames_eng or 0
@classmethod
def is_compatible(cls, data):
return any(
key
in (
"BPS",
"BPS-eng",
"DURATION",
"DURATION-eng",
"NUMBER_OF_FRAMES",
"NUMBER_OF_FRAMES-eng",
"NUMBER_OF_BYTES",
"NUMBER_OF_BYTES-eng",
)
for key in data.keys()
)
class FFprobeMp4SubtitleTags(FFprobeGenericSubtitleTags):
_DETECTABLE_TAGS = ("creation_time", "handler_name")
def __init__(self, data: dict):
super().__init__(data)
self.creation_time = data.get("creation_time")
self.handler_name = data.get("handler_name")
@classmethod
def is_compatible(cls, data):
return any(key in ("creation_time", "handler_name") for key in data.keys())
def _get_language(tags) -> Language:
og_lang = tags.get("language")
last_exc = None
if og_lang is not None:
if og_lang in _extra_languages:
extra = _extra_languages[og_lang]
title = tags.get("title", "n/a").lower()
if any(possible in title for possible in extra["matches"]):
logger.debug("Found extra language %s", extra["language_args"])
return Language(*extra["language_args"])
try:
lang = Language.fromalpha3b(og_lang)
# Test for suffix
assert lang.alpha2
return lang
except LanguageError as error:
last_exc = error
logger.debug("Error with '%s' language: %s", og_lang, error)
raise LanguageNotFound(f"Couldn't detect language from tags: {tags}") from last_exc
def _safe_td(value, default=None):
if value is None:
return default
try:
h, m, s = [float(ts.replace(",", ".").strip()) for ts in value.split(":")]
return timedelta(hours=h, minutes=m, seconds=s)
except ValueError as error:
logger.warning("Couldn't get duration field: %s. Returning %s", error, default)
return default
def _safe_int(value, default=None):
if value is None:
return default
try:
return int(value)
except ValueError:
logger.warning("Couldn't convert to int: %s. Returning %s", value, default)
return default
_extra_languages = {
"spa": {
"matches": (
"es-la",
"spa-la",
"spl",
"mx",
"latin",
"mexic",
"argent",
"latam",
),
"language_args": ("spa", "MX"),
},
"por": {
"matches": ("pt-br", "pob", "pb", "brazilian", "brasil", "brazil"),
"language_args": ("por", "BR"),
},
}