2018-09-17 00:27:00 +00:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
2020-05-20 15:29:39 +00:00
episode, season, disc, episode_count, season_count and episode_details properties
2018-09-17 00:27:00 +00:00
import copy
from collections import defaultdict
from rebulk import Rebulk, RemoveMatch, Rule, AppendMatch, RenameMatch
from rebulk.match import Match
from rebulk.remodule import re
from rebulk.utils import is_iterable
2020-05-20 15:29:39 +00:00
from guessit.rules import match_processors
from guessit.rules.common.numeral import parse_numeral, numeral
2018-09-17 00:27:00 +00:00
from .title import TitleFromPosition
2020-05-20 15:29:39 +00:00
from ..common import dash, alt_dash, seps, seps_no_fs
2018-09-17 00:27:00 +00:00
from ..common.formatters import strip
2020-05-20 15:29:39 +00:00
from ..common.pattern import is_disabled
from ..common.validators import seps_surround, int_coercable, and_
2018-09-17 00:27:00 +00:00
from ...reutils import build_or_pattern
2020-05-20 15:29:39 +00:00
def episodes(config):
2018-09-17 00:27:00 +00:00
Builder for rebulk object.
2020-05-20 15:29:39 +00:00
:param config: rule configuration
:type config: dict
2018-09-17 00:27:00 +00:00
:return: Created Rebulk object
:rtype: Rebulk
2020-05-20 15:29:39 +00:00
2018-09-17 00:27:00 +00:00
# pylint: disable=too-many-branches,too-many-statements,too-many-locals
2020-05-20 15:29:39 +00:00
def is_season_episode_disabled(context):
"""Whether season and episode rules should be enabled."""
return is_disabled(context, 'episode') or is_disabled(context, 'season')
2018-09-17 00:27:00 +00:00
def episodes_season_chain_breaker(matches):
Break chains if there's more than 100 offset between two neighbor values.
:param matches:
:type matches:
eps = matches.named('episode')
2020-05-20 15:29:39 +00:00
if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > episode_max_range:
2018-09-17 00:27:00 +00:00
return True
seasons = matches.named('season')
2020-05-20 15:29:39 +00:00
if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > season_max_range:
2018-09-17 00:27:00 +00:00
return True
return False
def season_episode_conflict_solver(match, other):
Conflict solver for episode/season patterns
:param match:
:param other:
2020-05-20 15:29:39 +00:00
if match.name != other.name:
if match.name == 'episode' and other.name == 'year':
2018-09-17 00:27:00 +00:00
return match
2020-05-20 15:29:39 +00:00
if match.name in ('season', 'episode'):
if other.name in ('video_codec', 'audio_codec', 'container', 'date'):
return match
if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
and not match.initiator.children.named(match.name + 'Marker')) or (
other.name == 'screen_size' and not int_coercable(other.raw)):
return match
if other.name in ('season', 'episode') and match.initiator != other.initiator:
if (match.initiator.name in ('weak_episode', 'weak_duplicate')
and other.initiator.name in ('weak_episode', 'weak_duplicate')):
return '__default__'
for current in (match, other):
if 'weak-episode' in current.tags or 'x' in current.initiator.raw.lower():
return current
2018-09-17 00:27:00 +00:00
return '__default__'
def ordering_validator(match):
Validator for season list. They should be in natural order to be validated.
episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator
or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid)
values = match.children.to_dict()
if 'season' in values and is_iterable(values['season']):
# Season numbers must be in natural order to be validated.
if not list(sorted(values['season'])) == values['season']:
return False
if 'episode' in values and is_iterable(values['episode']):
# Season numbers must be in natural order to be validated.
if not list(sorted(values['episode'])) == values['episode']:
return False
def is_consecutive(property_name):
Check if the property season or episode has valid consecutive values.
:param property_name:
:type property_name:
previous_match = None
valid = True
for current_match in match.children.named(property_name):
if previous_match:
lambda m: m.name == property_name + 'Separator')
separator = match.children.previous(current_match,
lambda m: m.name == property_name + 'Separator', 0)
2020-05-20 15:29:39 +00:00
if separator:
if separator.raw not in range_separators and separator.raw in weak_discrete_separators:
if not 0 < current_match.value - previous_match.value <= max_range_gap + 1:
valid = False
if separator.raw in strong_discrete_separators:
valid = True
2018-09-17 00:27:00 +00:00
previous_match = current_match
return valid
return is_consecutive('episode') and is_consecutive('season')
2020-05-20 15:29:39 +00:00
def validate_roman(match):
Validate a roman match if surrounded by separators
:param match:
:type match:
if int_coercable(match.raw):
return True
return seps_surround(match)
season_words = config['season_words']
episode_words = config['episode_words']
of_words = config['of_words']
all_words = config['all_words']
season_markers = config['season_markers']
season_ep_markers = config['season_ep_markers']
disc_markers = config['disc_markers']
episode_markers = config['episode_markers']
range_separators = config['range_separators']
weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators)
strong_discrete_separators = config['discrete_separators']
discrete_separators = strong_discrete_separators + weak_discrete_separators
episode_max_range = config['episode_max_range']
season_max_range = config['season_max_range']
max_range_gap = config['max_range_gap']
rebulk = Rebulk() \
.regex_defaults(flags=re.IGNORECASE) \
.string_defaults(ignore_case=True) \
.chain_defaults(chain_breaker=episodes_season_chain_breaker) \
.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
formatter={'season': int, 'episode': int, 'version': int, 'count': int},
2018-09-17 00:27:00 +00:00
# S01E02, 01x02, S01S02S03
2020-05-20 15:29:39 +00:00
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
2018-09-17 00:27:00 +00:00
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' +
2020-05-20 15:29:39 +00:00
build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)')\
.repeater('+') \
.regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators,
2018-09-17 00:27:00 +00:00
escape=True) +
2020-05-20 15:29:39 +00:00
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
2018-09-17 00:27:00 +00:00
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
2020-05-20 15:29:39 +00:00
r'@?(?P<episode>\d+)').repeater('+') \
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
2018-09-17 00:27:00 +00:00
.regex(r'(?P<season>\d+)@?' +
build_or_pattern(season_ep_markers, name='episodeMarker') +
2020-05-20 15:29:39 +00:00
r'@?(?P<episode>\d+)') \
2018-09-17 00:27:00 +00:00
.regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators,
escape=True) +
2020-05-20 15:29:39 +00:00
validator={'__parent__': and_(seps_surround, ordering_validator)},
disabled=is_season_episode_disabled) \
.defaults(tags=['SxxExx']) \
.regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)') \
.regex('(?P<other>Extras)', name='other', value='Extras', tags=['no-release-group-prefix']).repeater('?') \
2018-09-17 00:27:00 +00:00
.regex(build_or_pattern(season_markers + discrete_separators + range_separators,
escape=True) +
# episode_details property
2020-05-20 15:29:39 +00:00
for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'):
disabled=lambda context: is_disabled(context, 'episode_details'))
2018-09-17 00:27:00 +00:00
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'],
2020-05-20 15:29:39 +00:00
validator={'__parent__': and_(seps_surround, ordering_validator)},
2018-09-17 00:27:00 +00:00
2020-05-20 15:29:39 +00:00
2018-09-17 00:27:00 +00:00
formatter={'season': parse_numeral, 'count': parse_numeral},
2020-05-20 15:29:39 +00:00
validator={'__parent__': and_(seps_surround, ordering_validator),
2018-09-17 00:27:00 +00:00
'season': validate_roman,
2020-05-20 15:29:39 +00:00
'count': validate_roman},
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \
.defaults(formatter={'season': parse_numeral, 'count': parse_numeral},
validator={'season': validate_roman, 'count': validate_roman},
conflict_solver=season_episode_conflict_solver) \
2018-09-17 00:27:00 +00:00
.regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \
.regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \
.regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'],
name='seasonSeparator', escape=True) +
2020-05-20 15:29:39 +00:00
2018-09-17 00:27:00 +00:00
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' +
r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
2020-05-20 15:29:39 +00:00
disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode'))
2018-09-17 00:27:00 +00:00
rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' +
r'(?:v(?P<version>\d+))?' +
r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4
validator={'episode': validate_roman},
2020-05-20 15:29:39 +00:00
formatter={'episode': parse_numeral},
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode'))
2018-09-17 00:27:00 +00:00
rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')',
2020-05-20 15:29:39 +00:00
formatter={'other': lambda match: 'Complete'},
disabled=lambda context: is_disabled(context, 'season'))
2018-09-17 00:27:00 +00:00
# 12, 13
2020-05-20 15:29:39 +00:00
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None, tags=['weak-episode']) \
2018-09-17 00:27:00 +00:00
.regex(r'(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
2020-05-20 15:29:39 +00:00
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})', abbreviations=None).repeater('*')
2018-09-17 00:27:00 +00:00
# 012, 013
2020-05-20 15:29:39 +00:00
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None, tags=['weak-episode']) \
2018-09-17 00:27:00 +00:00
.regex(r'0(?P<episode>\d{1,2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
2020-05-20 15:29:39 +00:00
.regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
2018-09-17 00:27:00 +00:00
# 112, 113
2020-05-20 15:29:39 +00:00
disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \
.defaults(validator=None, tags=['weak-episode'], name='weak_episode') \
2018-09-17 00:27:00 +00:00
.regex(r'(?P<episode>\d{3,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
2020-05-20 15:29:39 +00:00
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})', abbreviations=None).repeater('*')
2018-09-17 00:27:00 +00:00
# 1, 2, 3
2020-05-20 15:29:39 +00:00
disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \
.defaults(validator=None, tags=['weak-episode']) \
2018-09-17 00:27:00 +00:00
.regex(r'(?P<episode>\d)') \
.regex(r'v(?P<version>\d+)').repeater('?') \
2020-05-20 15:29:39 +00:00
.regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})', abbreviations=None).repeater('*')
2018-09-17 00:27:00 +00:00
2020-05-20 15:29:39 +00:00
# e112, e113, 1e18, 3e19
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
2018-09-17 00:27:00 +00:00
.defaults(validator=None) \
2020-05-20 15:29:39 +00:00
.regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \
2018-09-17 00:27:00 +00:00
.regex(r'v(?P<version>\d+)').repeater('?') \
2020-05-20 15:29:39 +00:00
.regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
2018-09-17 00:27:00 +00:00
# ep 112, ep113, ep112, ep113
2020-05-20 15:29:39 +00:00
rebulk.chain(disabled=lambda context: is_disabled(context, 'episode')) \
2018-09-17 00:27:00 +00:00
.defaults(validator=None) \
.regex(r'ep-?(?P<episode>\d{1,4})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
2020-05-20 15:29:39 +00:00
.regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})', abbreviations=None).repeater('*')
2018-09-17 00:27:00 +00:00
# cap 112, cap 112_114
2020-05-20 15:29:39 +00:00
disabled=is_season_episode_disabled) \
.defaults(validator=None, tags=['see-pattern']) \
2018-09-17 00:27:00 +00:00
.regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \
# 102, 0102
2020-05-20 15:29:39 +00:00
rebulk.chain(tags=['weak-episode', 'weak-duplicate'],
2018-09-17 00:27:00 +00:00
disabled=lambda context: (context.get('episode_prefer_number', False) or
2020-05-20 15:29:39 +00:00
context.get('type') == 'movie') or is_season_episode_disabled(context)) \
.defaults(tags=['weak-episode', 'weak-duplicate'],
conflict_solver=season_episode_conflict_solver) \
2018-09-17 00:27:00 +00:00
.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \
.regex(r'v(?P<version>\d+)').repeater('?') \
2020-05-20 15:29:39 +00:00
.regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})', abbreviations=None).repeater('*')
2018-09-17 00:27:00 +00:00
2020-05-20 15:29:39 +00:00
disabled=lambda context: is_disabled(context, 'version'))
2018-09-17 00:27:00 +00:00
rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'])
# TODO: List of words
# detached of X count (season/episode)
rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) +
r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?',
2020-05-20 15:29:39 +00:00
disabled=lambda context: is_disabled(context, 'episode'))
disabled=lambda context: is_disabled(context, 'episode_format'))
rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode,
SeePatternRange(range_separators + ['_']),
SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate,
EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak(episode_words),
RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch)
2018-09-17 00:27:00 +00:00
2020-05-20 15:29:39 +00:00
return rebulk
2018-09-17 00:27:00 +00:00
2020-05-20 15:29:39 +00:00
class WeakConflictSolver(Rule):
Rule to decide whether weak-episode or weak-duplicate matches should be kept.
If an anime is detected:
- weak-duplicate matches should be removed
- weak-episode matches should be tagged as anime
- weak-episode matches are removed unless they're part of an episode range match.
priority = 128
consequence = [RemoveMatch, AppendMatch]
def enabled(self, context):
return context.get('type') != 'movie'
def is_anime(cls, matches):
"""Return True if it seems to be an anime.
Anime characteristics:
- version, crc32 matches
- screen_size inside brackets
- release_group at start and inside brackets
if matches.named('version') or matches.named('crc32'):
return True
for group in matches.markers.named('group'):
if matches.range(group.start, group.end, predicate=lambda m: m.name == 'screen_size'):
return True
if matches.markers.starting(group.start, predicate=lambda m: m.name == 'path'):
hole = matches.holes(group.start, group.end, index=0)
if hole and hole.raw == group.raw:
return True
return False
def when(self, matches, context):
to_remove = []
to_append = []
anime_detected = self.is_anime(matches)
for filepart in matches.markers.named('path'):
weak_matches = matches.range(filepart.start, filepart.end, predicate=(
lambda m: m.initiator.name == 'weak_episode'))
weak_dup_matches = matches.range(filepart.start, filepart.end, predicate=(
lambda m: m.initiator.name == 'weak_duplicate'))
if anime_detected:
if weak_matches:
for match in matches.range(filepart.start, filepart.end, predicate=(
lambda m: m.name == 'episode' and m.initiator.name != 'weak_duplicate')):
episode = copy.copy(match)
episode.tags = episode.tags + ['anime']
elif weak_dup_matches:
episodes_in_range = matches.range(filepart.start, filepart.end, predicate=(
lambda m:
m.name == 'episode' and m.initiator.name == 'weak_episode'
and m.initiator.children.named('episodeSeparator')
if not episodes_in_range and not matches.range(filepart.start, filepart.end,
predicate=lambda m: 'SxxExx' in m.tags):
for match in episodes_in_range:
episode = copy.copy(match)
episode.tags = []
if to_append:
if to_remove or to_append:
return to_remove, to_append
return False
2018-09-17 00:27:00 +00:00
class CountValidator(Rule):
Validate count property and rename it
priority = 64
consequence = [RemoveMatch, RenameMatch('episode_count'), RenameMatch('season_count')]
properties = {'episode_count': [None], 'season_count': [None]}
def when(self, matches, context):
to_remove = []
episode_count = []
season_count = []
for count in matches.named('count'):
previous = matches.previous(count, lambda match: match.name in ['episode', 'season'], 0)
if previous:
if previous.name == 'episode':
elif previous.name == 'season':
2020-05-20 15:29:39 +00:00
if to_remove or episode_count or season_count:
return to_remove, episode_count, season_count
return False
2018-09-17 00:27:00 +00:00
class SeePatternRange(Rule):
Create matches for episode range for SEE pattern. E.g.: Cap.102_104
priority = 128
consequence = [RemoveMatch, AppendMatch]
def __init__(self, range_separators):
2021-03-22 14:26:26 +00:00
2018-09-17 00:27:00 +00:00
self.range_separators = range_separators
def when(self, matches, context):
to_remove = []
to_append = []
for separator in matches.tagged('see-pattern', lambda m: m.name == 'episodeSeparator'):
previous_match = matches.previous(separator, lambda m: m.name == 'episode' and 'see-pattern' in m.tags, 0)
next_match = matches.next(separator, lambda m: m.name == 'season' and 'see-pattern' in m.tags, 0)
if not next_match:
next_match = matches.next(next_match, lambda m: m.name == 'episode' and 'see-pattern' in m.tags, 0)
if previous_match and next_match and separator.value in self.range_separators:
for episode_number in range(previous_match.value + 1, next_match.value + 1):
match = copy.copy(next_match)
match.value = episode_number
2020-05-20 15:29:39 +00:00
if to_remove or to_append:
return to_remove, to_append
return False
2018-09-17 00:27:00 +00:00
class AbstractSeparatorRange(Rule):
Remove separator matches and create matches for season range.
priority = 128
consequence = [RemoveMatch, AppendMatch]
def __init__(self, range_separators, property_name):
2021-03-22 14:26:26 +00:00
2018-09-17 00:27:00 +00:00
self.range_separators = range_separators
self.property_name = property_name
def when(self, matches, context):
to_remove = []
to_append = []
for separator in matches.named(self.property_name + 'Separator'):
2020-05-20 15:29:39 +00:00
previous_match = matches.previous(separator, lambda m: m.name == self.property_name, 0)
next_match = matches.next(separator, lambda m: m.name == self.property_name, 0)
initiator = separator.initiator
2018-09-17 00:27:00 +00:00
if previous_match and next_match and separator.value in self.range_separators:
for episode_number in range(previous_match.value + 1, next_match.value):
match = copy.copy(next_match)
match.value = episode_number
2020-05-20 15:29:39 +00:00
2018-09-17 00:27:00 +00:00
previous_match = None
for next_match in matches.named(self.property_name):
if previous_match:
separator = matches.input_string[previous_match.initiator.end:next_match.initiator.start]
if separator not in self.range_separators:
separator = strip(separator)
if separator in self.range_separators:
2020-05-20 15:29:39 +00:00
initiator = previous_match.initiator
2018-09-17 00:27:00 +00:00
for episode_number in range(previous_match.value + 1, next_match.value):
match = copy.copy(next_match)
match.value = episode_number
2020-05-20 15:29:39 +00:00
2018-09-17 00:27:00 +00:00
to_append.append(Match(previous_match.end, next_match.start - 1,
name=self.property_name + 'Separator',
to_remove.append(next_match) # Remove and append match to support proper ordering
previous_match = next_match
2020-05-20 15:29:39 +00:00
if to_remove or to_append:
return to_remove, to_append
return False
class RenameToAbsoluteEpisode(Rule):
Rename episode to absolute_episodes.
Absolute episodes are only used if two groups of episodes are detected:
S02E04-06 25-27
25-27 S02E04-06
2x04-06 25-27
28. Anime Name S02E05
The matches in the group with higher episode values are renamed to absolute_episode.
consequence = RenameMatch('absolute_episode')
def when(self, matches, context): # pylint:disable=inconsistent-return-statements
initiators = {match.initiator for match in matches.named('episode')
if len(match.initiator.children.named('episode')) > 1}
if len(initiators) != 2:
ret = []
for filepart in matches.markers.named('path'):
if matches.range(filepart.start + 1, filepart.end, predicate=lambda m: m.name == 'episode'):
matches.starting(filepart.start, predicate=lambda m: m.initiator.name == 'weak_episode'))
return ret
initiators = sorted(initiators, key=lambda item: item.end)
if not matches.holes(initiators[0].end, initiators[1].start, predicate=lambda m: m.raw.strip(seps)):
first_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[0])
second_range = matches.named('episode', predicate=lambda m: m.initiator == initiators[1])
if len(first_range) == len(second_range):
if second_range[0].value > first_range[0].value:
return second_range
if first_range[0].value > second_range[0].value:
return first_range
2018-09-17 00:27:00 +00:00
class EpisodeNumberSeparatorRange(AbstractSeparatorRange):
Remove separator matches and create matches for episoderNumber range.
def __init__(self, range_separators):
2021-03-22 14:26:26 +00:00
super().__init__(range_separators, "episode")
2018-09-17 00:27:00 +00:00
class SeasonSeparatorRange(AbstractSeparatorRange):
Remove separator matches and create matches for season range.
def __init__(self, range_separators):
2021-03-22 14:26:26 +00:00
super().__init__(range_separators, "season")
2018-09-17 00:27:00 +00:00
class RemoveWeakIfMovie(Rule):
2020-05-20 15:29:39 +00:00
Remove weak-episode tagged matches if it seems to be a movie.
2018-09-17 00:27:00 +00:00
priority = 64
consequence = RemoveMatch
def enabled(self, context):
return context.get('type') != 'episode'
def when(self, matches, context):
to_remove = []
to_ignore = set()
remove = False
for filepart in matches.markers.named('path'):
year = matches.range(filepart.start, filepart.end, predicate=lambda m: m.name == 'year', index=0)
if year:
remove = True
2020-05-20 15:29:39 +00:00
next_match = matches.range(year.end, filepart.end, predicate=lambda m: m.private, index=0)
if (next_match and not matches.holes(year.end, next_match.start, predicate=lambda m: m.raw.strip(seps))
and not matches.at_match(next_match, predicate=lambda m: m.name == 'year')):
2018-09-17 00:27:00 +00:00
2020-05-20 15:29:39 +00:00
to_ignore.update(matches.range(filepart.start, filepart.end,
predicate=lambda m: len(m.children.named('episode')) > 1))
2018-09-17 00:27:00 +00:00
if remove:
2020-05-20 15:29:39 +00:00
to_remove.extend(matches.tagged('weak-episode', predicate=(
lambda m: m.initiator not in to_ignore and 'anime' not in m.tags)))
2018-09-17 00:27:00 +00:00
return to_remove
2020-05-20 15:29:39 +00:00
class RemoveWeak(Rule):
Remove weak-episode matches which appears after video, source, and audio matches.
priority = 16
consequence = RemoveMatch, AppendMatch
def __init__(self, episode_words):
2021-03-22 14:26:26 +00:00
2020-05-20 15:29:39 +00:00
self.episode_words = episode_words
def when(self, matches, context):
to_remove = []
to_append = []
for filepart in matches.markers.named('path'):
weaks = matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags)
if weaks:
weak = weaks[0]
previous = matches.previous(weak, predicate=lambda m: m.name in (
'audio_codec', 'screen_size', 'streaming_service', 'source', 'video_profile',
'audio_channels', 'audio_profile'), index=0)
if previous and not matches.holes(
previous.end, weak.start, predicate=lambda m: m.raw.strip(seps)):
if previous.raw.lower() in self.episode_words:
episode = copy.copy(weak)
episode.name = 'episode'
episode.value = int(weak.value)
episode.start = previous.start
episode.private = False
episode.tags = []
except ValueError:
if to_remove or to_append:
return to_remove, to_append
return False
2018-09-17 00:27:00 +00:00
class RemoveWeakIfSxxExx(Rule):
2020-05-20 15:29:39 +00:00
Remove weak-episode tagged matches if SxxExx pattern is matched.
Weak episodes at beginning of filepart are kept.
2018-09-17 00:27:00 +00:00
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for filepart in matches.markers.named('path'):
if matches.range(filepart.start, filepart.end,
2020-05-20 15:29:39 +00:00
predicate=lambda m: not m.private and 'SxxExx' in m.tags):
for match in matches.range(filepart.start, filepart.end, predicate=lambda m: 'weak-episode' in m.tags):
if match.start != filepart.start or match.initiator.name != 'weak_episode':
2018-09-17 00:27:00 +00:00
return to_remove
class RemoveInvalidSeason(Rule):
Remove invalid season matches.
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for filepart in matches.markers.named('path'):
strong_season = matches.range(filepart.start, filepart.end, index=0,
predicate=lambda m: m.name == 'season'
and not m.private and 'SxxExx' in m.tags)
if strong_season:
if strong_season.initiator.children.named('episode'):
for season in matches.range(strong_season.end, filepart.end,
predicate=lambda m: m.name == 'season' and not m.private):
# remove weak season or seasons without episode matches
if 'SxxExx' not in season.tags or not season.initiator.children.named('episode'):
if season.initiator:
return to_remove
class RemoveInvalidEpisode(Rule):
Remove invalid episode matches.
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for filepart in matches.markers.named('path'):
strong_episode = matches.range(filepart.start, filepart.end, index=0,
predicate=lambda m: m.name == 'episode'
and not m.private and 'SxxExx' in m.tags)
if strong_episode:
strong_ep_marker = RemoveInvalidEpisode.get_episode_prefix(matches, strong_episode)
for episode in matches.range(strong_episode.end, filepart.end,
predicate=lambda m: m.name == 'episode' and not m.private):
ep_marker = RemoveInvalidEpisode.get_episode_prefix(matches, episode)
if strong_ep_marker and ep_marker and strong_ep_marker.value.lower() != ep_marker.value.lower():
if episode.initiator:
return to_remove
def get_episode_prefix(matches, episode):
Return episode prefix: episodeMarker or episodeSeparator
return matches.previous(episode, index=0,
predicate=lambda m: m.name in ('episodeMarker', 'episodeSeparator'))
class RemoveWeakDuplicate(Rule):
Remove weak-duplicate tagged matches if duplicate patterns, for example The 100.109
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
to_remove = []
for filepart in matches.markers.named('path'):
patterns = defaultdict(list)
for match in reversed(matches.range(filepart.start, filepart.end,
2020-05-20 15:29:39 +00:00
predicate=lambda m: 'weak-duplicate' in m.tags)):
2018-09-17 00:27:00 +00:00
if match.pattern in patterns[match.name]:
return to_remove
class EpisodeDetailValidator(Rule):
Validate episode_details if they are detached or next to season or episode.
priority = 64
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for detail in matches.named('episode_details'):
if not seps_surround(detail) \
and not matches.previous(detail, lambda match: match.name in ['season', 'episode']) \
and not matches.next(detail, lambda match: match.name in ['season', 'episode']):
return ret
class RemoveDetachedEpisodeNumber(Rule):
If multiple episode are found, remove those that are not detached from a range and less than 10.
Fairy Tail 2 - 16-20, 2 should be removed.
priority = 64
consequence = RemoveMatch
dependency = [RemoveWeakIfSxxExx, RemoveWeakDuplicate]
def when(self, matches, context):
ret = []
episode_numbers = []
episode_values = set()
2020-05-20 15:29:39 +00:00
for match in matches.named('episode', lambda m: not m.private and 'weak-episode' in m.tags):
2018-09-17 00:27:00 +00:00
if match.value not in episode_values:
2020-05-20 15:29:39 +00:00
episode_numbers = list(sorted(episode_numbers, key=lambda m: m.value))
2018-09-17 00:27:00 +00:00
if len(episode_numbers) > 1 and \
2020-05-20 15:29:39 +00:00
episode_numbers[0].value < 10 and \
episode_numbers[1].value - episode_numbers[0].value != 1:
2018-09-17 00:27:00 +00:00
parent = episode_numbers[0]
while parent: # TODO: Add a feature in rebulk to avoid this ...
parent = parent.parent
return ret
class VersionValidator(Rule):
Validate version if previous match is episode or if surrounded by separators.
priority = 64
dependency = [RemoveWeakIfMovie, RemoveWeakIfSxxExx]
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for version in matches.named('version'):
episode_number = matches.previous(version, lambda match: match.name == 'episode', 0)
if not episode_number and not seps_surround(version.initiator):
return ret
class EpisodeSingleDigitValidator(Rule):
Remove single digit episode when inside a group that doesn't own title.
dependency = [TitleFromPosition]
consequence = RemoveMatch
def when(self, matches, context):
ret = []
for episode in matches.named('episode', lambda match: len(match.initiator) == 1):
group = matches.markers.at_match(episode, lambda marker: marker.name == 'group', index=0)
if group:
if not matches.range(*group.span, predicate=lambda match: match.name == 'title'):
return ret
2020-05-20 15:29:39 +00:00
class RenameToDiscMatch(Rule):
Rename episodes detected with `d` episodeMarkers to `disc`.
consequence = [RenameMatch('disc'), RenameMatch('discMarker'), RemoveMatch]
def when(self, matches, context):
discs = []
markers = []
to_remove = []
disc_disabled = is_disabled(context, 'disc')
for marker in matches.named('episodeMarker', predicate=lambda m: m.value.lower() == 'd'):
if disc_disabled:
discs.extend(sorted(marker.initiator.children.named('episode'), key=lambda m: m.value))
if discs or markers or to_remove:
return discs, markers, to_remove
return False