Add support for configurable scores (movies and episodes)

Currently only configurable via manual `data/config/config.ini`
text edition.

New configurable values are `series_scores` and `movie_scores`.

For each config section, the sum of the config values (except hash)
must be equal to the hash value plus one (1), otherwise default
values will be used (notified via debug log).

Hash values are not meant to be modified; the value is shown in
`config.ini` for reference. Modifying hash values would imply
breaking Bazarr's score logic.
This commit is contained in:
Vitiko 2022-11-05 01:01:37 -04:00
parent 0b8274ec3e
commit 708fbfcd8e
10 changed files with 245 additions and 106 deletions

View File

@ -238,27 +238,26 @@ defaults = {
"year": 90,
"season": 30,
"episode": 30,
"release_group": 15,
"release_group": 14,
"source": 7,
"audio_codec": 3,
"resolution": 2,
"video_codec": 2,
"streaming_service": 1,
"hearing_impaired": 1,
"streaming_service": 0,
"edition": 0,
},
'movie_scores': {
"hash": 119,
"title": 60,
"year": 30,
"release_group": 15,
"release_group": 13,
"source": 7,
"audio_codec": 3,
"resolution": 2,
"video_codec": 2,
"streaming_service": 1,
"edition": 1,
"hearing_impaired": 1,
"streaming_service": 0,
"edition": 0,
}
}
@ -611,3 +610,8 @@ def configure_proxy_func():
os.environ['HTTPS_PROXY'] = str(proxy)
exclude = ','.join(get_array_from(settings.proxy.exclude))
os.environ['NO_PROXY'] = exclude
def get_scores():
settings = get_settings()
return {"movie": settings["movie_scores"], "episode": settings["series_scores"]}

View File

@ -9,12 +9,11 @@ import subliminal
from subzero.language import Language
from subliminal_patch.core import save_subtitles
from subliminal_patch.core_persistent import download_best_subtitles
from subliminal_patch.score import compute_score
from subliminal_patch.score import ComputeScore
from app.config import settings, get_array_from
from app.config import settings, get_array_from, get_scores
from utilities.helper import get_target_folder, force_unicode
from languages.get_languages import alpha3_from_alpha2
from subtitles.tools.score import movie_score, series_score
from .pool import update_pools, _get_pool
from .utils import get_video, _get_lang_obj, _get_scores, _set_forced_providers
@ -46,7 +45,6 @@ def generate_subtitles(path, languages, audio_language, sceneName, title, media_
video = get_video(force_unicode(path), title, sceneName, providers=providers, media_type=media_type)
if video:
handler = series_score if media_type == "series" else movie_score
minimum_score = settings.general.minimum_score
minimum_score_movie = settings.general.minimum_score_movie
min_score, max_score, scores = _get_scores(media_type, minimum_score_movie, minimum_score)
@ -59,9 +57,7 @@ def generate_subtitles(path, languages, audio_language, sceneName, title, media_
pool_instance=pool,
min_score=int(min_score),
hearing_impaired=hi_required,
compute_score=compute_score,
throttle_time=None, # fixme
score_obj=handler)
compute_score=ComputeScore(get_scores()))
else:
downloaded_subtitles = None
logging.info("BAZARR All providers are throttled")

View File

@ -11,13 +11,12 @@ import subliminal
from subzero.language import Language
from subliminal_patch.core import save_subtitles
from subliminal_patch.core_persistent import list_all_subtitles, download_subtitles
from subliminal_patch.score import compute_score
from subliminal_patch.score import ComputeScore
from languages.get_languages import alpha3_from_alpha2
from app.config import settings, get_array_from
from app.config import get_scores, settings, get_array_from, get_settings
from utilities.helper import get_target_folder, force_unicode
from app.database import get_profiles_list
from subtitles.tools.score import movie_score, series_score
from .pool import update_pools, _get_pool, _init_pool
from .utils import get_video, _get_lang_obj, _get_scores, _set_forced_providers
@ -35,6 +34,7 @@ def manual_search(path, profile_id, providers, sceneName, title, media_type):
language_set, initial_language_set, original_format = _get_language_obj(profile_id=profile_id)
also_forced = any([x.forced for x in initial_language_set])
forced_required = all([x.forced for x in initial_language_set])
compute_score = ComputeScore(get_scores())
_set_forced_providers(pool=pool, also_forced=also_forced, forced_required=forced_required)
if providers:
@ -43,8 +43,6 @@ def manual_search(path, profile_id, providers, sceneName, title, media_type):
logging.info("BAZARR All providers are throttled")
return None
if video:
handler = series_score if media_type == "series" else movie_score
try:
if providers:
subtitles = list_all_subtitles([video], language_set, pool)
@ -101,8 +99,7 @@ def manual_search(path, profile_id, providers, sceneName, title, media_type):
initial_hi = None
_, max_score, scores = _get_scores(media_type, minimum_score_movie, minimum_score)
score, score_without_hash = compute_score(matches, s, video, hearing_impaired=initial_hi,
score_obj=handler)
score, score_without_hash = compute_score(matches, s, video, hearing_impaired=initial_hi)
if 'hash' not in matches:
not_matched = scores - matches
s.score = score_without_hash

View File

@ -27,8 +27,8 @@ from concurrent.futures import as_completed
from .extensions import provider_registry
from .exceptions import MustGetBlacklisted
from .score import compute_score as default_compute_score
from subliminal.exceptions import ServiceUnavailable, DownloadLimitExceeded
from subliminal.score import compute_score as default_compute_score
from subliminal.utils import hash_napiprojekt, hash_opensubtitles, hash_shooter, hash_thesubdb
from subliminal.video import VIDEO_EXTENSIONS, Video, Episode, Movie
from subliminal.core import guessit, ProviderPool, io, is_windows_special_path, \
@ -440,7 +440,7 @@ class SZProviderPool(ProviderPool):
return True
def download_best_subtitles(self, subtitles, video, languages, min_score=0, hearing_impaired=False, only_one=False,
compute_score=None, score_obj=None):
compute_score=None):
"""Download the best matching subtitles.
patch:
@ -486,8 +486,7 @@ class SZProviderPool(ProviderPool):
orig_matches = matches.copy()
logger.debug('%r: Found matches %r', s, matches)
score, score_without_hash = compute_score(matches, s, video, hearing_impaired=use_hearing_impaired,
score_obj=score_obj)
score, score_without_hash = compute_score(matches, s, video, use_hearing_impaired)
unsorted_subtitles.append(
(s, score, score_without_hash, matches, orig_matches))
@ -1005,7 +1004,7 @@ def download_subtitles(subtitles, pool_class=ProviderPool, **kwargs):
def download_best_subtitles(videos, languages, min_score=0, hearing_impaired=False, only_one=False, compute_score=None,
pool_class=ProviderPool, throttle_time=0, score_obj=None, **kwargs):
pool_class=ProviderPool, throttle_time=0, **kwargs):
"""List and download the best matching subtitles.
The `videos` must pass the `languages` and `undefined` (`only_one`) checks of :func:`check_video`.
@ -1049,7 +1048,7 @@ def download_best_subtitles(videos, languages, min_score=0, hearing_impaired=Fal
subtitles = pool.download_best_subtitles(pool.list_subtitles(video, languages - video.subtitle_languages),
video, languages, min_score=min_score,
hearing_impaired=hearing_impaired, only_one=only_one,
compute_score=compute_score, score_obj=score_obj)
compute_score=compute_score)
logger.info('Downloaded %d subtitle(s)', len(subtitles))
downloaded_subtitles[video].extend(subtitles)

View File

@ -50,8 +50,7 @@ def download_best_subtitles(
hearing_impaired=False,
only_one=False,
compute_score=None,
throttle_time=0,
score_obj=None,
**kwargs
):
downloaded_subtitles = defaultdict(list)
@ -67,8 +66,6 @@ def download_best_subtitles(
if not checked_videos:
return downloaded_subtitles
got_multiple = len(checked_videos) > 1
# download best subtitles
for video in checked_videos:
logger.info("Downloading best subtitles for %r", video)
@ -80,13 +77,8 @@ def download_best_subtitles(
hearing_impaired=hearing_impaired,
only_one=only_one,
compute_score=compute_score,
score_obj=score_obj,
)
logger.info("Downloaded %d subtitle(s)", len(subtitles))
downloaded_subtitles[video].extend(subtitles)
if got_multiple and throttle_time:
logger.debug("Waiting %ss before continuing ...", throttle_time)
time.sleep(throttle_time)
return downloaded_subtitles

View File

@ -28,85 +28,142 @@ def framerate_equal(source, check):
return False
def compute_score(matches, subtitle, video, hearing_impaired=None, score_obj=None):
"""Compute the score of the `subtitle` against the `video` with `hearing_impaired` preference.
DEFAULT_SCORES = {
"episode": {
"hash": 359,
"series": 180,
"year": 90,
"season": 30,
"episode": 30,
"release_group": 14,
"source": 7,
"audio_codec": 3,
"resolution": 2,
"video_codec": 2,
"streaming_service": 1,
"hearing_impaired": 1,
},
"movie": {
"hash": 119,
"title": 60,
"year": 30,
"release_group": 13,
"source": 7,
"audio_codec": 3,
"resolution": 2,
"video_codec": 2,
"streaming_service": 1,
"edition": 1,
"hearing_impaired": 1,
},
}
patch:
- remove upper bounds of score
- re-add matches argument and remove get_matches from here
:func:`compute_score` uses the :meth:`Subtitle.get_matches <subliminal.subtitle.Subtitle.get_matches>` method and
applies the scores (either from :data:`episode_scores` or :data:`movie_scores`) after some processing.
def _check_hash_sum(scores: dict):
hash_val = scores["hash"]
rest_sum = sum(val for key, val in scores.items() if key != "hash")
logger.debug("Hash value: %s -> Rest sum: %s", hash_val, rest_sum)
return rest_sum - 1 == hash_val
:param subtitle: the subtitle to compute the score of.
:type subtitle: :class:`~subliminal.subtitle.Subtitle`
:param video: the video to compute the score against.
:type video: :class:`~subliminal.video.Video`
:param bool hearing_impaired: hearing impaired preference.
:return: score of the subtitle.
:rtype: int
"""
logger.info('%r: Computing score for video %r with %r', subtitle, video, dict(hearing_impaired=hearing_impaired))
class ComputeScore:
def __init__(self, scores=None):
if scores:
valid = True
for val in scores.values():
if not _check_hash_sum(val):
logger.debug("Scores sum - 1 is not equal to hash. Using defaults")
self._scores = DEFAULT_SCORES
valid = False
break
if score_obj is not None:
scores = score_obj.scores
score_obj.check_custom_profiles(subtitle, matches)
else:
scores = get_scores(video)
if valid is True:
self._scores = scores
else:
self._scores = DEFAULT_SCORES
is_episode = isinstance(video, Episode)
is_movie = isinstance(video, Movie)
# Hash values should be the same. Update from defaults to ensure it
for key in self._scores.keys():
self._scores[key]["hash"] = DEFAULT_SCORES[key]["hash"]
episode_hash_valid_if = {"series", "season", "episode", "source"}
movie_hash_valid_if = {"video_codec", "source"}
def __call__(self, matches, subtitle, video, hearing_impaired=None):
scores = self._scores[video.__class__.__name__.lower()]
logger.debug("Scores to use for %s: %s", video, scores)
orig_matches = matches.copy()
is_episode = isinstance(video, Episode)
is_movie = isinstance(video, Movie)
# on hash match, discard everything else
if subtitle.hash_verifiable and 'hash' in matches:
# hash is error-prone, try to fix that
hash_valid_if = episode_hash_valid_if if is_episode else movie_hash_valid_if
episode_hash_valid_if = {"series", "season", "episode", "source"}
movie_hash_valid_if = {"video_codec", "source"}
# don't validate hashes of specials, as season and episode tend to be wrong
if is_movie or not video.is_special:
if hash_valid_if <= set(matches):
# series, season and episode matched, hash is valid
logger.debug('%r: Using valid hash, as %s are correct (%r) and (%r)', subtitle, hash_valid_if, matches,
video)
matches &= {'hash'}
else:
# no match, invalidate hash
logger.debug('%r: Ignoring hash as other matches are wrong (missing: %r) and (%r)', subtitle,
hash_valid_if - matches, video)
matches -= {"hash"}
elif 'hash' in matches:
logger.debug('%r: Hash not verifiable for this provider. Keeping it', subtitle)
matches &= {'hash'}
orig_matches = matches.copy()
# handle equivalent matches
eq_matches = set()
if is_episode:
_episode_checks(video, eq_matches, matches)
elif is_movie and 'imdb_id' in matches:
logger.debug('Adding imdb_id match equivalents')
eq_matches |= {'title', 'year'}
# on hash match, discard everything else
if subtitle.hash_verifiable and "hash" in matches:
# hash is error-prone, try to fix that
hash_valid_if = episode_hash_valid_if if is_episode else movie_hash_valid_if
matches |= eq_matches
# don't validate hashes of specials, as season and episode tend to be wrong
if is_movie or not video.is_special:
if hash_valid_if <= set(matches):
# series, season and episode matched, hash is valid
logger.debug(
"%r: Using valid hash, as %s are correct (%r) and (%r)",
subtitle,
hash_valid_if,
matches,
video,
)
matches &= {"hash"}
else:
# no match, invalidate hash
logger.debug(
"%r: Ignoring hash as other matches are wrong (missing: %r) and (%r)",
subtitle,
hash_valid_if - matches,
video,
)
matches -= {"hash"}
elif "hash" in matches:
logger.debug(
"%r: Hash not verifiable for this provider. Keeping it", subtitle
)
matches &= {"hash"}
# handle hearing impaired
if hearing_impaired is not None and subtitle.hearing_impaired == hearing_impaired:
logger.debug('Matched hearing_impaired')
matches.add('hearing_impaired')
orig_matches.add('hearing_impaired')
# handle equivalent matches
eq_matches = set()
if is_episode:
_episode_checks(video, eq_matches, matches)
elif is_movie and "imdb_id" in matches:
logger.debug("Adding imdb_id match equivalents")
eq_matches |= {"title", "year"}
# compute the score
score = sum((scores.get(match, 0) for match in matches))
logger.info('%r: Computed score %r with final matches %r', subtitle, score, matches)
matches |= eq_matches
score_without_hash = sum((scores.get(match, 0) for match in orig_matches | eq_matches if match != "hash"))
# handle hearing impaired
if (
hearing_impaired is not None
and subtitle.hearing_impaired == hearing_impaired
):
logger.debug("Matched hearing_impaired")
matches.add("hearing_impaired")
orig_matches.add("hearing_impaired")
return score, score_without_hash
# compute the score
score = sum((scores.get(match, 0) for match in matches))
logger.info(
"%r: Computed score %r with final matches %r", subtitle, score, matches
)
score_without_hash = sum(
(
scores.get(match, 0)
for match in orig_matches | eq_matches
if match != "hash"
)
)
return score, score_without_hash
def _episode_checks(video, eq_matches, matches):
@ -127,6 +184,14 @@ def _episode_checks(video, eq_matches, matches):
eq_matches |= {"series", "year"}
# specials
if video.is_special and "title" in matches and "series" in matches and "year" in matches:
if (
video.is_special
and "title" in matches
and "series" in matches
and "year" in matches
):
logger.debug("Adding special title match equivalent")
eq_matches |= {"season", "episode"}
compute_score = ComputeScore()

View File

@ -548,12 +548,23 @@ def guess_matches(video, guess, partial=False):
if _has_match(video, guess, key):
matches.add(key)
# Add streaming service match for non-web sources
if video.source and video.source != "Web":
matches.add("streaming_service")
# As edition tags are rare, add edition match if the video doesn't have an edition
if not video.edition:
matches.add("edition")
for key in ("streaming_service", "edition"):
if _check_optional(video, guess, key):
matches.add(key)
return matches
def _check_optional(video, guess, key="edition"):
guess_optional = guess.get(key)
video_optional = getattr(video, key, None)
if video_optional and guess_optional:
return _has_match(video, guess, key)
if not video_optional and not guess_optional:
logger.debug("Both video and guess don't have %s. Returning True", key)
return True
logger.debug("One item doesn't have %s (%s -> %s). Returning False", key, guess_optional, video_optional)
return False

View File

@ -0,0 +1,10 @@
from bazarr.app import config
def test_get_settings():
assert isinstance(config.get_settings(), dict)
def test_get_scores():
assert isinstance(config.get_scores()["movie"], dict)
assert isinstance(config.get_scores()["episode"], dict)

View File

@ -0,0 +1,36 @@
from subliminal_patch import score
from subliminal_patch.providers.karagarga import KaragargaSubtitle
# def __call__(self, matches, subtitle, video, hearing_impaired=None):
def test_compute_score_set_var(movies, languages):
subtitle = KaragargaSubtitle(languages["en"], "", "", "")
score.compute_score({"hash"}, subtitle, movies["dune"])
def test_compute_score_set_var_w_episode(episodes, languages):
subtitle = KaragargaSubtitle(languages["en"], "", "", "")
score.compute_score({"hash"}, subtitle, episodes["breaking_bad_s01e01"])
def test_compute_score_defaults():
assert score.ComputeScore()._scores == score.DEFAULT_SCORES
def test_compute_score_custom_invalid():
assert (
score.ComputeScore({"movie": {"hash": 120}, "episode": {"hash": 321}})._scores
== score.DEFAULT_SCORES
)
def test_compute_score_custom_valid():
scores_copy = score.DEFAULT_SCORES.copy()
scores_copy["movie"]["release_group"] = 12
scores_copy["movie"]["source"] = 8
scores_ = score.ComputeScore(scores_copy)
assert scores_._scores["movie"]["release_group"] == 12
assert scores_._scores["movie"]["source"] == 8

View File

@ -0,0 +1,29 @@
from subliminal_patch import subtitle
def test_guess_matches_w_edition_only_video(movies):
movie = movies["dune"]
movie.edition = "Director's Cut"
matches = subtitle.guess_matches(movie, {})
assert "edition" not in matches
def test_guess_matches_w_edition_only_guess(movies):
movie = movies["dune"]
movie.edition = None
matches = subtitle.guess_matches(movie, {"edition": "Director's Cut"})
assert "edition" not in matches
def test_guess_matches_w_edition_both(movies):
movie = movies["dune"]
movie.edition = "Director's Cut"
matches = subtitle.guess_matches(movie, {"edition": "Director's Cut"})
assert "edition" in matches
def test_guess_matches_w_edition_both_empty(movies):
movie = movies["dune"]
movie.edition = None
matches = subtitle.guess_matches(movie, {})
assert "edition" in matches