mirror of
https://github.com/morpheus65535/bazarr
synced 2025-01-03 13:35:18 +00:00
550 lines
19 KiB
Python
550 lines
19 KiB
Python
# coding=utf-8
|
|
from __future__ import absolute_import
|
|
import datetime
|
|
import gzip
|
|
import hashlib
|
|
import os
|
|
import logging
|
|
import traceback
|
|
import types
|
|
import zlib
|
|
|
|
import sys
|
|
|
|
from json_tricks.nonp import loads
|
|
from subzero.lib.json import dumps
|
|
try:
|
|
from os import scandir
|
|
_scandir_generic = scandir
|
|
except ImportError:
|
|
from scandir import scandir, scandir_generic as _scandir_generic
|
|
from .constants import mode_map
|
|
import six
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class JSONStoredSubtitle(object):
|
|
score = None
|
|
storage_type = None
|
|
hash = None
|
|
provider_name = None
|
|
id = None
|
|
date_added = None
|
|
mode = "a" # auto/manual/auto-better (a/m/b)
|
|
content = None
|
|
mods = None
|
|
encoding = None
|
|
last_mod = None # file modification
|
|
|
|
def initialize(self, score, storage_type, hash, provider_name, id, date_added=None, mode="a", content=None,
|
|
mods=None, encoding=None, last_mod=None):
|
|
self.score = int(score)
|
|
self.storage_type = storage_type
|
|
self.hash = hash
|
|
self.provider_name = provider_name
|
|
self.id = id
|
|
self.date_added = date_added or datetime.datetime.now()
|
|
self.mode = mode
|
|
self.content = content
|
|
self.mods = mods or []
|
|
self.encoding = encoding
|
|
self.last_mod = last_mod
|
|
|
|
def add_mod(self, identifier):
|
|
self.mods = self.mods or []
|
|
if identifier is None:
|
|
self.mods = []
|
|
return
|
|
|
|
self.mods.append(identifier)
|
|
|
|
@classmethod
|
|
def get_mode_verbose(cls, mode):
|
|
return mode_map.get(mode, "Unknown")
|
|
|
|
@property
|
|
def mode_verbose(self):
|
|
return self.get_mode_verbose(self.mode)
|
|
|
|
def serialize(self):
|
|
return self.__dict__
|
|
|
|
def deserialize(self, data):
|
|
if data["content"]:
|
|
# legacy: storage was unicode; content is always present in encoded form
|
|
if isinstance(data["content"], str):
|
|
data["content"] = data["content"].encode(data["encoding"])
|
|
|
|
self.initialize(**data)
|
|
|
|
@property
|
|
def key(self):
|
|
return self.provider_name, self.id
|
|
|
|
|
|
class JSONStoredVideoSubtitles(object):
|
|
"""
|
|
manages stored subtitles for video_id per media_part/language combination
|
|
"""
|
|
video_id = None # rating_key
|
|
title = None
|
|
parts = None
|
|
version = None
|
|
item_type = None # movie / episode
|
|
added_at = None
|
|
|
|
def initialize(self, plex_item, version=None):
|
|
self.video_id = str(plex_item.rating_key)
|
|
|
|
self.title = plex_item.title
|
|
self.parts = {}
|
|
self.version = version
|
|
self.item_type = plex_item.type
|
|
self.added_at = datetime.datetime.fromtimestamp(plex_item.added_at)
|
|
|
|
def deserialize(self, data):
|
|
parts = data.pop("parts")
|
|
self.parts = {}
|
|
self.__dict__.update(data)
|
|
|
|
if parts:
|
|
for part_id, part in six.iteritems(parts):
|
|
self.parts[part_id] = {}
|
|
for language, sub_data in six.iteritems(part):
|
|
self.parts[part_id][language] = {}
|
|
|
|
for sub_key, subtitle_data in six.iteritems(sub_data):
|
|
if sub_key == "current":
|
|
if not isinstance(subtitle_data, tuple):
|
|
subtitle_data = tuple(subtitle_data.split("__"))
|
|
self.parts[part_id][language]["current"] = subtitle_data
|
|
elif sub_key == "blacklist":
|
|
bl = dict((tuple([str(a) for a in k.split("__")]), v) for k, v in six.iteritems(subtitle_data))
|
|
self.parts[part_id][language]["blacklist"] = bl
|
|
else:
|
|
sub = JSONStoredSubtitle()
|
|
|
|
sub.initialize(**subtitle_data)
|
|
if not isinstance(sub_key, tuple):
|
|
sub_key = tuple(sub_key.split("__"))
|
|
|
|
self.parts[part_id][language][sub_key] = sub
|
|
|
|
def serialize(self):
|
|
data = {"parts": {}}
|
|
for key, value in six.iteritems(self.__dict__):
|
|
if key != "parts":
|
|
data[key] = value
|
|
|
|
for part_id, part in six.iteritems(self.parts):
|
|
data["parts"][part_id] = {}
|
|
for language, sub_data in six.iteritems(part):
|
|
data["parts"][part_id][language] = {}
|
|
|
|
for sub_key, stored_subtitle in six.iteritems(sub_data):
|
|
if sub_key == "current":
|
|
data["parts"][part_id][language]["current"] = "__".join(stored_subtitle)
|
|
elif sub_key == "blacklist":
|
|
data["parts"][part_id][language]["blacklist"] = dict(("__".join(k), v) for k, v in
|
|
six.iteritems(stored_subtitle))
|
|
else:
|
|
if stored_subtitle.content and not stored_subtitle.encoding:
|
|
continue
|
|
|
|
serialized_sub = stored_subtitle.serialize()
|
|
if serialized_sub:
|
|
data["parts"][part_id][language]["__".join(sub_key)] = serialized_sub
|
|
|
|
return data
|
|
|
|
def add(self, part_id, lang, subtitle, storage_type, date_added=None, mode="a", last_mod=None, set_current=True):
|
|
part_id = str(part_id)
|
|
part = self.parts.get(part_id)
|
|
if not part:
|
|
self.parts[part_id] = {}
|
|
part = self.parts[part_id]
|
|
|
|
subs = part.get(lang)
|
|
if not subs:
|
|
part[lang] = {}
|
|
subs = part[lang]
|
|
|
|
sub_key = self.get_sub_key(subtitle.provider_name, subtitle.id)
|
|
subs[sub_key] = JSONStoredSubtitle()
|
|
subs[sub_key].initialize(subtitle.score, storage_type, hashlib.md5(subtitle.content).hexdigest(),
|
|
subtitle.provider_name, subtitle.id, date_added=date_added, mode=mode,
|
|
content=subtitle.content, mods=subtitle.mods, encoding="utf-8",
|
|
last_mod=last_mod)
|
|
|
|
if set_current:
|
|
logger.debug(u"Setting subtitle as current: %r", subtitle)
|
|
subs["current"] = sub_key
|
|
|
|
return True
|
|
|
|
def get_any(self, part_id, lang):
|
|
part_id = str(part_id)
|
|
part = self.parts.get(part_id)
|
|
if not part:
|
|
return
|
|
|
|
subs = part.get(str(lang))
|
|
if not subs:
|
|
return
|
|
|
|
if "current" in subs and subs["current"]:
|
|
return subs.get(subs["current"])
|
|
|
|
def get(self, part_id, lang, sub_key):
|
|
subs = self.get_all(part_id, lang)
|
|
if not subs:
|
|
return
|
|
|
|
return subs.get(sub_key)
|
|
|
|
def get_all(self, part_id, lang):
|
|
part_id = str(part_id)
|
|
part = self.parts.get(part_id)
|
|
if not part:
|
|
return
|
|
|
|
return part.get(str(lang))
|
|
|
|
def set_current(self, part_id, lang, sub_key):
|
|
subs = self.get_all(part_id, lang)
|
|
if not subs:
|
|
return
|
|
|
|
if sub_key not in subs:
|
|
logger.info("Tried setting unknown subtitle %s as current" % sub_key)
|
|
return
|
|
|
|
subs["current"] = sub_key
|
|
logger.debug("Set subtitle %s as current for %s, %s" % (sub_key, part_id, lang))
|
|
|
|
def get_by_provider(self, part_id, lang, provider_name):
|
|
out = []
|
|
all_subs = self.get_all(part_id, lang)
|
|
if not all_subs:
|
|
return out
|
|
|
|
for key, subtitle in six.iteritems(all_subs):
|
|
if key in ("current", "blacklist"):
|
|
continue
|
|
|
|
if subtitle.provider_name == provider_name:
|
|
out.append(subtitle)
|
|
return out
|
|
|
|
def count(self, part_id, lang):
|
|
part_id = str(part_id)
|
|
part = self.parts.get(part_id)
|
|
if not part:
|
|
return 0
|
|
|
|
subs = part.get(str(lang))
|
|
return len([key for key in list(subs.keys()) if key not in ("current", "blacklist")])
|
|
|
|
def get_sub_key(self, provider_name, id):
|
|
return provider_name, str(id)
|
|
|
|
def get_blacklist(self, part_id, lang):
|
|
part_id = str(part_id)
|
|
part = self.parts.get(part_id)
|
|
if not part:
|
|
return {}, {}
|
|
|
|
subs = part.get(str(lang))
|
|
if not subs:
|
|
return {}, {}
|
|
|
|
current_bl = subs.get("blacklist", {})
|
|
return current_bl, subs
|
|
|
|
def blacklist(self, part_id, lang, sub_key, add=True):
|
|
current_bl, subs = self.get_blacklist(part_id, lang)
|
|
sub = subs.get(subs["current"])
|
|
if not sub:
|
|
return
|
|
|
|
if sub_key in current_bl:
|
|
if add:
|
|
return
|
|
else:
|
|
del current_bl[sub_key]
|
|
subs["blacklist"] = current_bl
|
|
return
|
|
|
|
current_bl[sub_key] = {"date_added": sub.date_added, "score": sub.score, "mode": sub.mode, "storage_type":
|
|
sub.storage_type}
|
|
subs["blacklist"] = current_bl
|
|
|
|
def __repr__(self):
|
|
return six.text_type(self)
|
|
|
|
def __unicode__(self):
|
|
return u"%s (%s)" % (self.title, self.video_id)
|
|
|
|
def __str__(self):
|
|
return str(self.video_id)
|
|
|
|
|
|
class StoredSubtitlesManager(object):
|
|
"""
|
|
manages the storage and retrieval of StoredVideoSubtitles instances for a given video_id
|
|
"""
|
|
storage = None
|
|
version = 3
|
|
extension = ".json.gz"
|
|
|
|
def __init__(self, storage, threadkit, plexapi_item_getter):
|
|
self.storage = storage
|
|
self.get_item = plexapi_item_getter
|
|
self.threadkit = threadkit
|
|
|
|
def destroy(self):
|
|
self.storage = None
|
|
self.get_item = None
|
|
self.threadkit = None
|
|
|
|
def get_storage_filename(self, video_id):
|
|
return "subs_%s" % video_id
|
|
|
|
@property
|
|
def dataitems_path(self):
|
|
return os.path.join(getattr(self.storage, "_core").storage.data_path, "DataItems")
|
|
|
|
def get_json_data_path(self, bare_fn):
|
|
if not bare_fn.endswith(self.extension):
|
|
return os.path.join(self.dataitems_path, "%s%s" % (bare_fn, self.extension))
|
|
return os.path.join(self.dataitems_path, bare_fn)
|
|
|
|
def get_all_files(self, scandir_generic=False):
|
|
_scandir = _scandir_generic if scandir_generic else scandir
|
|
for entry in _scandir(self.dataitems_path):
|
|
if entry.is_file(follow_symlinks=False) and \
|
|
entry.name.startswith("subs_") and \
|
|
entry.name.endswith(self.extension):
|
|
yield entry.name
|
|
|
|
def get_recent_files(self, age_days=30):
|
|
fl = []
|
|
root = self.dataitems_path
|
|
recent_dt = datetime.datetime.now() - datetime.timedelta(days=age_days)
|
|
|
|
def run(scandir_generic=False):
|
|
for fn in self.get_all_files(scandir_generic=scandir_generic):
|
|
ctime = os.path.getctime(os.path.join(root, fn))
|
|
created = datetime.datetime.fromtimestamp(ctime)
|
|
if created > recent_dt:
|
|
fl.append(fn)
|
|
try:
|
|
run()
|
|
except OSError:
|
|
run(scandir_generic=True)
|
|
return fl
|
|
|
|
def load_recent_files(self, age_days=30):
|
|
fl = self.get_recent_files(age_days=age_days)
|
|
out = {}
|
|
for fn in fl:
|
|
data = self.load(filename=fn)
|
|
if data:
|
|
out[fn] = data
|
|
return out
|
|
|
|
def delete_missing(self, wanted_languages=set(), scandir_generic=False):
|
|
deleted = []
|
|
|
|
def delete_fn(filename):
|
|
if filename.endswith(self.extension):
|
|
self.delete(self.get_json_data_path(filename))
|
|
else:
|
|
self.legacy_delete(filename)
|
|
|
|
for fn in self.get_all_files(scandir_generic=scandir_generic):
|
|
video_id = os.path.basename(fn).split(".")[0].split("subs_")[1]
|
|
item = self.get_item(video_id)
|
|
|
|
# item missing, delete storage
|
|
if not item:
|
|
delete_fn(fn)
|
|
deleted.append(video_id)
|
|
|
|
else:
|
|
known_parts = []
|
|
|
|
# wrong (legacy) info, delete storage
|
|
if not hasattr(item, "media"):
|
|
delete_fn(fn)
|
|
deleted.append(video_id)
|
|
continue
|
|
|
|
for media in item.media:
|
|
for part in media.parts:
|
|
known_parts.append(str(part.id))
|
|
stored_subs = self.load(filename=fn)
|
|
|
|
if not stored_subs:
|
|
continue
|
|
|
|
missing_parts = set(stored_subs.parts).difference(set(known_parts))
|
|
|
|
changed_any = False
|
|
|
|
# remove known info about deleted parts
|
|
if missing_parts:
|
|
logger.debug("Parts removed: %s:%s, removing data", video_id, missing_parts)
|
|
for missing_part in missing_parts:
|
|
if missing_part in stored_subs.parts:
|
|
try:
|
|
del stored_subs.parts[missing_part]
|
|
changed_any = True
|
|
except:
|
|
pass
|
|
|
|
# remove known info about non-existing languages
|
|
for part_id, part in six.iteritems(stored_subs.parts):
|
|
missing_languages = set(part).difference(wanted_languages)
|
|
if missing_languages:
|
|
logger.debug("Languages removed: %s:%s:%s, removing data", video_id, part_id, missing_languages)
|
|
for missing_language in missing_languages:
|
|
try:
|
|
del stored_subs.parts[part_id][missing_language]
|
|
changed_any = True
|
|
except:
|
|
pass
|
|
|
|
if changed_any:
|
|
self.save(stored_subs)
|
|
stored_subs = None
|
|
missing_parts = None
|
|
missing_languages = None
|
|
|
|
return deleted
|
|
|
|
def migrate_v2(self, subs_for_video):
|
|
plex_item = self.get_item(subs_for_video.video_id)
|
|
if not plex_item:
|
|
return False
|
|
subs_for_video.item_type = plex_item.type
|
|
subs_for_video.added_at = datetime.datetime.fromtimestamp(plex_item.added_at)
|
|
subs_for_video.version = 2
|
|
return True
|
|
|
|
def migrate_v3(self, subs_for_video):
|
|
subs_for_video.version = 3
|
|
return True
|
|
|
|
def load(self, video_id=None, filename=None):
|
|
subs_for_video = None
|
|
bare_fn = self.get_storage_filename(video_id) if video_id else filename
|
|
json_path = self.get_json_data_path(bare_fn)
|
|
basename = os.path.basename(json_path)
|
|
|
|
#logger.debug("Loading subtitle storage data file: %s", basename)
|
|
|
|
if os.path.exists(json_path):
|
|
# new style data
|
|
subs_for_video = JSONStoredVideoSubtitles()
|
|
try:
|
|
with self.threadkit.Lock(key="sub_storage_%s" % basename):
|
|
if sys.platform == "win32":
|
|
try:
|
|
with open(json_path, 'rb') as f:
|
|
s = zlib.decompress(f.read())
|
|
except zlib.error:
|
|
# fallback to old gzip win32 implementation
|
|
with gzip.open(json_path, 'rb', compresslevel=6) as f:
|
|
s = f.read()
|
|
|
|
else:
|
|
with gzip.open(json_path, 'rb', compresslevel=6) as f:
|
|
s = f.read()
|
|
|
|
data = loads(s)
|
|
except:
|
|
logger.error("Couldn't load JSON data for %s: %s", bare_fn, traceback.format_exc())
|
|
return
|
|
|
|
subs_for_video.deserialize(data)
|
|
data = None
|
|
|
|
if not subs_for_video:
|
|
return
|
|
|
|
# apply possible migrations
|
|
cur_ver = old_ver = subs_for_video.version
|
|
|
|
if cur_ver < self.version:
|
|
success = False
|
|
while cur_ver < self.version:
|
|
cur_ver += 1
|
|
mig_func = "migrate_v%s" % cur_ver
|
|
if hasattr(self, mig_func):
|
|
logger.info("Migrating subtitle storage for %s %s>%s" % (subs_for_video.video_id, old_ver, cur_ver))
|
|
success = getattr(self, mig_func)(subs_for_video)
|
|
if success is False:
|
|
logger.error("Couldn't migrate %s, removing data", subs_for_video.video_id)
|
|
self.delete(json_path)
|
|
break
|
|
|
|
if cur_ver > old_ver and success:
|
|
logger.info("Storing migrated subtitle storage for %s" % subs_for_video.video_id)
|
|
self.save(subs_for_video)
|
|
elif not success:
|
|
logger.info("Migration of %s %s>%s failed" % (subs_for_video.video_id, old_ver, cur_ver))
|
|
|
|
return subs_for_video
|
|
|
|
def new(self, plex_item):
|
|
subs_for_video = JSONStoredVideoSubtitles()
|
|
subs_for_video.initialize(plex_item, version=self.version)
|
|
return subs_for_video
|
|
|
|
def load_or_new(self, plex_item, save=False):
|
|
subs_for_video = self.load(plex_item.rating_key)
|
|
if not subs_for_video:
|
|
logger.info("Creating new subtitle storage for: %s", plex_item.rating_key)
|
|
subs_for_video = self.new(plex_item)
|
|
if save:
|
|
self.save(subs_for_video)
|
|
return subs_for_video
|
|
|
|
def save(self, subs_for_video):
|
|
data = subs_for_video.serialize()
|
|
temp_fn = self.get_json_data_path(self.get_storage_filename(subs_for_video.video_id) + "_tmp")
|
|
fn = self.get_json_data_path(self.get_storage_filename(subs_for_video.video_id))
|
|
basename = os.path.basename(fn)
|
|
json_data = str(dumps(data, ensure_ascii=False))
|
|
with self.threadkit.Lock(key="sub_storage_%s" % basename):
|
|
if sys.platform == "win32":
|
|
try:
|
|
f = open(temp_fn, "w+b")
|
|
|
|
try:
|
|
f.seek(0, os.SEEK_CUR)
|
|
f.write(zlib.compress(json_data, 6))
|
|
f.flush()
|
|
except:
|
|
logger.error("Something went wrong when writing to: %s: %s", basename, traceback.format_exc())
|
|
finally:
|
|
f.close()
|
|
except:
|
|
logger.error("Something went REALLY wrong when writing to: %s: %s", basename,
|
|
traceback.format_exc())
|
|
else:
|
|
with gzip.open(temp_fn, "wb", compresslevel=6) as f:
|
|
f.write(json_data)
|
|
|
|
os.rename(temp_fn, fn)
|
|
|
|
def delete(self, filename):
|
|
os.remove(filename)
|
|
|
|
def legacy_delete(self, filename):
|
|
try:
|
|
self.storage.Remove(filename)
|
|
except:
|
|
logger.error("Failed to delete item %s: %s" % (filename, traceback.format_exc()))
|