bazarr/libs/guessit/rules/processors.py

239 lines
7.6 KiB
Python
Raw Normal View History

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Processors
"""
from collections import defaultdict
import copy
import six
from rebulk import Rebulk, Rule, CustomRule, POST_PROCESS, PRE_PROCESS, AppendMatch, RemoveMatch
from .common import seps_no_groups
from .common.formatters import cleanup
from .common.comparators import marker_sorted
from .common.date import valid_year
from .common.words import iter_words
class EnlargeGroupMatches(CustomRule):
"""
Enlarge matches that are starting and/or ending group to include brackets in their span.
"""
priority = PRE_PROCESS
def when(self, matches, context):
starting = []
ending = []
for group in matches.markers.named('group'):
for match in matches.starting(group.start + 1):
starting.append(match)
for match in matches.ending(group.end - 1):
ending.append(match)
if starting or ending:
return starting, ending
def then(self, matches, when_response, context):
starting, ending = when_response
for match in starting:
matches.remove(match)
match.start -= 1
match.raw_start += 1
matches.append(match)
for match in ending:
matches.remove(match)
match.end += 1
match.raw_end -= 1
matches.append(match)
class EquivalentHoles(Rule):
"""
Creates equivalent matches for holes that have same values than existing (case insensitive)
"""
priority = POST_PROCESS
consequence = AppendMatch
def when(self, matches, context):
new_matches = []
for filepath in marker_sorted(matches.markers.named('path'), matches):
holes = matches.holes(start=filepath.start, end=filepath.end, formatter=cleanup)
for name in matches.names:
for hole in list(holes):
for current_match in matches.named(name):
if isinstance(current_match.value, six.string_types) and \
hole.value.lower() == current_match.value.lower():
if 'equivalent-ignore' in current_match.tags:
continue
new_value = _preferred_string(hole.value, current_match.value)
if hole.value != new_value:
hole.value = new_value
if current_match.value != new_value:
current_match.value = new_value
hole.name = name
hole.tags = ['equivalent']
new_matches.append(hole)
if hole in holes:
holes.remove(hole)
return new_matches
class RemoveAmbiguous(Rule):
"""
If multiple matches are found with same name and different values, keep the one in the most valuable filepart.
Also keep others match with same name and values than those kept ones.
"""
priority = POST_PROCESS
consequence = RemoveMatch
def __init__(self, sort_function=marker_sorted, predicate=None):
super(RemoveAmbiguous, self).__init__()
self.sort_function = sort_function
self.predicate = predicate
def when(self, matches, context):
fileparts = self.sort_function(matches.markers.named('path'), matches)
previous_fileparts_names = set()
values = defaultdict(list)
to_remove = []
for filepart in fileparts:
filepart_matches = matches.range(filepart.start, filepart.end, predicate=self.predicate)
filepart_names = set()
for match in filepart_matches:
filepart_names.add(match.name)
if match.name in previous_fileparts_names:
if match.value not in values[match.name]:
to_remove.append(match)
else:
if match.value not in values[match.name]:
values[match.name].append(match.value)
previous_fileparts_names.update(filepart_names)
return to_remove
class RemoveLessSpecificSeasonEpisode(RemoveAmbiguous):
"""
If multiple season/episodes matches are found with different values,
keep the one tagged as 'SxxExx' or in the rightmost filepart.
"""
def __init__(self, name):
super(RemoveLessSpecificSeasonEpisode, self).__init__(
sort_function=(lambda markers, matches:
marker_sorted(list(reversed(markers)), matches,
lambda match: match.name == name and 'SxxExx' in match.tags)),
predicate=lambda match: match.name == name)
def _preferred_string(value1, value2): # pylint:disable=too-many-return-statements
"""
Retrieves preferred title from both values.
:param value1:
:type value1: str
:param value2:
:type value2: str
:return: The preferred title
:rtype: str
"""
if value1 == value2:
return value1
if value1.istitle() and not value2.istitle():
return value1
if not value1.isupper() and value2.isupper():
return value1
if not value1.isupper() and value1[0].isupper() and not value2[0].isupper():
return value1
if _count_title_words(value1) > _count_title_words(value2):
return value1
return value2
def _count_title_words(value):
"""
Count only many words are titles in value.
:param value:
:type value:
:return:
:rtype:
"""
ret = 0
for word in iter_words(value):
if word.value.istitle():
ret += 1
return ret
class SeasonYear(Rule):
"""
If a season is a valid year and no year was found, create an match with year.
"""
priority = POST_PROCESS
consequence = AppendMatch
def when(self, matches, context):
ret = []
if not matches.named('year'):
for season in matches.named('season'):
if valid_year(season.value):
year = copy.copy(season)
year.name = 'year'
ret.append(year)
return ret
class Processors(CustomRule):
"""
Empty rule for ordering post_processing properly.
"""
priority = POST_PROCESS
def when(self, matches, context):
pass
def then(self, matches, when_response, context): # pragma: no cover
pass
class StripSeparators(CustomRule):
"""
Strip separators from matches. Keep separators if they are from acronyms, like in ".S.H.I.E.L.D."
"""
priority = POST_PROCESS
def when(self, matches, context):
return matches
def then(self, matches, when_response, context): # pragma: no cover
for match in matches:
for _ in range(0, len(match.span)):
if match.raw[0] in seps_no_groups and (len(match.raw) < 3 or match.raw[2] not in seps_no_groups):
match.raw_start += 1
for _ in reversed(range(0, len(match.span))):
if match.raw[-1] in seps_no_groups and (len(match.raw) < 3 or match.raw[-3] not in seps_no_groups):
match.raw_end -= 1
def processors():
"""
Builder for rebulk object.
:return: Created Rebulk object
:rtype: Rebulk
"""
return Rebulk().rules(EnlargeGroupMatches, EquivalentHoles,
RemoveLessSpecificSeasonEpisode('season'),
RemoveLessSpecificSeasonEpisode('episode'),
RemoveAmbiguous, SeasonYear, Processors, StripSeparators)