Upgraded guessit to latest version and include new dependencies.

This commit is contained in:
morpheus65535 2022-01-05 20:12:46 -05:00
parent 166c2a745a
commit f55492a25c
83 changed files with 4382 additions and 900 deletions

View File

@ -56,7 +56,7 @@ def guess_filename(filename, options):
print(yline)
i += 1
else:
print('GuessIt found:', json.dumps(guess, cls=GuessitEncoder, ensure_ascii=False))
print('GuessIt found:', json.dumps(guess, cls=GuessitEncoder, indent=4, ensure_ascii=False))
def display_properties(options):
@ -85,10 +85,10 @@ def display_properties(options):
properties_list = list(sorted(properties.keys()))
for property_name in properties_list:
property_values = properties.get(property_name)
print(2 * ' ' + '[+] %s' % (property_name,))
print(2 * ' ' + f'[+] {property_name}')
if property_values and options.get('values'):
for property_value in property_values:
print(4 * ' ' + '[!] %s' % (property_value,))
print(4 * ' ' + f'[!] {property_value}')
def main(args=None): # pylint:disable=too-many-branches
@ -136,11 +136,8 @@ def main(args=None): # pylint:disable=too-many-branches
for filename in options.get('filename'):
filenames.append(filename)
if options.get('input_file'):
input_file = open(options.get('input_file'), 'r', encoding='utf-8')
try:
with open(options.get('input_file'), 'r', encoding='utf-8') as input_file:
filenames.extend([line.strip() for line in input_file.readlines()])
finally:
input_file.close()
filenames = list(filter(lambda f: f, filenames))

View File

@ -4,4 +4,4 @@
Version module
"""
# pragma: no cover
__version__ = '3.3.1'
__version__ = '3.4.3'

View File

@ -4,11 +4,11 @@
API functions that can be used by external software
"""
from collections import OrderedDict
from pathlib import Path
import os
import traceback
from collections import OrderedDict
from copy import deepcopy
from pathlib import Path
from rebulk.introspector import introspect
@ -25,22 +25,21 @@ class GuessitException(Exception):
def __init__(self, string, options):
super().__init__("An internal error has occured in guessit.\n"
"===================== Guessit Exception Report =====================\n"
"version=%s\n"
"string=%s\n"
"options=%s\n"
f"version={__version__}\n"
f"string={str(string)}\n"
f"options={str(options)}\n"
"--------------------------------------------------------------------\n"
"%s"
f"{traceback.format_exc()}"
"--------------------------------------------------------------------\n"
"Please report at "
"https://github.com/guessit-io/guessit/issues.\n"
"====================================================================" %
(__version__, str(string), str(options), traceback.format_exc()))
"====================================================================")
self.string = string
self.options = options
def configure(options=None, rules_builder=rebulk_builder, force=False):
def configure(options=None, rules_builder=None, force=False):
"""
Load configuration files and initialize rebulk rules if required.
@ -55,6 +54,13 @@ def configure(options=None, rules_builder=rebulk_builder, force=False):
default_api.configure(options, rules_builder=rules_builder, force=force)
def reset():
"""
Reset api internal state.
"""
default_api.reset()
def guessit(string, options=None):
"""
Retrieves all matches from string as a dict
@ -104,6 +110,12 @@ class GuessItApi(object):
self.load_config_options = None
self.advanced_config = None
def reset(self):
"""
Reset api internal state.
"""
self.__init__()
@classmethod
def _fix_encoding(cls, value):
if isinstance(value, list):
@ -121,7 +133,7 @@ class GuessItApi(object):
return False
return True
def configure(self, options=None, rules_builder=rebulk_builder, force=False, sanitize_options=True):
def configure(self, options=None, rules_builder=None, force=False, sanitize_options=True):
"""
Load configuration files and initialize rebulk rules if required.
@ -131,9 +143,14 @@ class GuessItApi(object):
:type rules_builder:
:param force:
:type force: bool
:param sanitize_options:
:type force: bool
:return:
:rtype: dict
"""
if not rules_builder:
rules_builder = rebulk_builder
if sanitize_options:
options = parse_options(options, True)
options = self._fix_encoding(options)
@ -154,7 +171,7 @@ class GuessItApi(object):
self.advanced_config != advanced_config
if should_build_rebulk:
self.advanced_config = advanced_config
self.advanced_config = deepcopy(advanced_config)
self.rebulk = rules_builder(advanced_config)
self.config = config

View File

@ -0,0 +1,152 @@
"""
Config module.
"""
from importlib import import_module
from typing import Any, List
from rebulk import Rebulk
_regex_prefix = 're:'
_import_prefix = 'import:'
_import_cache = {}
_eval_prefix = 'eval:'
_eval_cache = {}
_pattern_types = ('regex', 'string')
_default_module_names = {
'validator': 'guessit.rules.common.validators',
'formatter': 'guessit.rules.common.formatters'
}
def _process_option(name: str, value: Any):
if name in ('validator', 'conflict_solver', 'formatter'):
if isinstance(value, dict):
return {item_key: _process_option(name, item_value) for item_key, item_value in value.items()}
if value is not None:
return _process_option_executable(value, _default_module_names.get(name))
return value
def _import(value: str, default_module_name=None):
if '.' in value:
module_name, target = value.rsplit(':', 1)
else:
module_name = default_module_name
target = value
import_id = module_name + ":" + target
if import_id in _import_cache:
return _import_cache[import_id]
mod = import_module(module_name)
imported = mod
for item in target.split("."):
imported = getattr(imported, item)
_import_cache[import_id] = imported
return imported
def _eval(value: str):
compiled = _eval_cache.get(value)
if not compiled:
compiled = compile(value, '<string>', 'eval')
return eval(compiled) # pylint:disable=eval-used
def _process_option_executable(value: str, default_module_name=None):
if value.startswith(_import_prefix):
value = value[len(_import_prefix):]
return _import(value, default_module_name)
if value.startswith(_eval_prefix):
value = value[len(_eval_prefix):]
return _eval(value)
if value.startswith('lambda ') or value.startswith('lambda:'):
return _eval(value)
return value
def _process_callable_entry(callable_spec: str, rebulk: Rebulk, entry: dict):
_process_option_executable(callable_spec)(rebulk, **entry)
def _build_entry_decl(entry, options, value):
entry_decl = dict(options.get(None, {}))
if not value.startswith('_'):
entry_decl['value'] = value
if isinstance(entry, str):
if entry.startswith(_regex_prefix):
entry_decl["regex"] = [entry[len(_regex_prefix):]]
else:
entry_decl["string"] = [entry]
else:
entry_decl.update(entry)
if "pattern" in entry_decl:
legacy_pattern = entry.pop("pattern")
if legacy_pattern.startswith(_regex_prefix):
entry_decl["regex"] = [legacy_pattern[len(_regex_prefix):]]
else:
entry_decl["string"] = [legacy_pattern]
return entry_decl
def load_patterns(rebulk: Rebulk,
pattern_type: str,
patterns: List[str],
options: dict = None):
"""
Load patterns for a prepared config entry
:param rebulk: Rebulk builder to use.
:param pattern_type: Pattern type.
:param patterns: Patterns
:param options: kwargs options to pass to rebulk pattern function.
:return:
"""
default_options = options.get(None) if options else None
item_options = dict(default_options) if default_options else {}
pattern_type_option = options.get(pattern_type)
if pattern_type_option:
item_options.update(pattern_type_option)
item_options = {name: _process_option(name, value) for name, value in item_options.items()}
getattr(rebulk, pattern_type)(*patterns, **item_options)
def load_config_patterns(rebulk: Rebulk,
config: dict,
options: dict = None):
"""
Load patterns defined in given config.
:param rebulk: Rebulk builder to use.
:param config: dict containing pattern definition.
:param options: Additional pattern options to use.
:type options: Dict[Dict[str, str]] A dict where key is the pattern type (regex, string, functional) and value is
the default kwargs options to pass.
:return:
"""
if options is None:
options = {}
for value, raw_entries in config.items():
entries = raw_entries if isinstance(raw_entries, list) else [raw_entries]
for entry in entries:
if isinstance(entry, dict) and "callable" in entry.keys():
_process_callable_entry(entry.pop("callable"), rebulk, entry)
continue
entry_decl = _build_entry_decl(entry, options, value)
for pattern_type in _pattern_types:
patterns = entry_decl.get(pattern_type)
if not patterns:
continue
if not isinstance(patterns, list):
patterns = [patterns]
patterns_entry_decl = dict(entry_decl)
for pattern_type_to_remove in _pattern_types:
patterns_entry_decl.pop(pattern_type_to_remove, None)
current_pattern_options = dict(options)
current_pattern_options[None] = patterns_entry_decl
load_patterns(rebulk, pattern_type, patterns, current_pattern_options)

View File

@ -1,6 +1,13 @@
{
"expected_title": ["OSS 117", "This is Us"],
"allowed_countries": ["au", "gb", "us"],
"expected_title": [
"OSS 117",
"This is Us"
],
"allowed_countries": [
"au",
"gb",
"us"
],
"allowed_languages": [
"ca",
"cs",
@ -45,21 +52,116 @@
"ending": ")]}"
},
"audio_codec": {
"audio_codec": {
"MP3": {"string": ["MP3", "LAME"],"regex": ["LAME(?:\\d)+-?(?:\\d)+"]},
"MP2": "MP2",
"Dolby Digital": {"string": ["Dolby", "DolbyDigital"], "regex": ["Dolby-Digital", "DD", "AC3D?"]},
"Dolby Atmos": {"string": ["Atmos"], "regex": ["Dolby-?Atmos"]},
"AAC": "AAC",
"Dolby Digital Plus": ["EAC3", "DDP", "DD+"],
"FLAC": "Flac",
"DTS": "DTS",
"DTS-HD": {"regex": ["DTS-?HD", "DTS(?=-?MA)"], "conflict_solver": "lambda match, other: other if other.name == 'audio_codec' else '__default__'"},
"Dolby TrueHD": {"regex": ["True-?HD"] },
"Opus": "Opus",
"Vorbis": "Vorbis",
"PCM": "PCM",
"LPCM": "LPCM"
},
"audio_channels": {
"1.0": ["1ch", "mono"],
"2.0": ["2ch", "stereo", "re:(2[\\W_]0(?:ch)?)(?=[^\\d]|$)"],
"1.0": [
"1ch",
"mono",
"re:(1[\\W_]0(?:ch)?)(?=[^\\d]|$)"
],
"2.0": [
"2ch",
"stereo",
{"regex": "(2[\\W_]0(?:ch)?)(?=[^\\d]|$)", "children": true},
{"string": "20", "validator": "import:seps_after", "tags": "weak-audio_channels"}
],
"5.1": [
"5ch",
"6ch",
"re:(5[\\W_][01](?:ch)?)(?=[^\\d]|$)",
"re:(6[\\W_]0(?:ch)?)(?=[^\\d]|$)"
{"regex": "(5[\\W_][01](?:ch)?)(?=[^\\d]|$)", "children": true},
{"regex": "(6[\\W_]0(?:ch)?)(?=[^\\d]|$)", "children": true},
{"regex": "5[01]", "validator": "import:seps_after", "tags": "weak-audio_channels"}
],
"7.1": ["7ch", "8ch", "re:(7[\\W_][01](?:ch)?)(?=[^\\d]|$)"]
"7.1": [
"7ch",
"8ch",
{"regex": "(7[\\W_][01](?:ch)?)(?=[^\\d]|$)", "children": true},
{"regex": "7[01]", "validator": "import:seps_after", "tags": "weak-audio_channels"}
]
},
"audio_profile": {
"Master Audio": {"string": "MA", "tags": ["audio_profile.rule", "DTS-HD"]},
"High Resolution Audio": {"string": ["HR", "HRA"], "tags": ["audio_profile.rule", "DTS-HD"]},
"Extended Surround": {"string": "ES", "tags": ["audio_profile.rule", "DTS"]},
"High Efficiency": {"string": "HE", "tags": ["audio_profile.rule", "AAC"]},
"Low Complexity": {"string": "LC", "tags": ["audio_profile.rule", "AAC"]},
"High Quality": {"string": "HQ", "tags": ["audio_profile.rule", "Dolby Digital"]},
"EX": {"string": "EX", "tags": ["audio_profile.rule", "Dolby Digital"]}
}
},
"bit_rate": {
"bit_rate": {
"_": {
"regex": ["\\d+-?[kmg]b(ps|its?)", "\\d+\\.\\d+-?[kmg]b(ps|its?)"],
"conflict_solver": "lambda match, other: match if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags else other",
"formatter": "import:guessit.rules.common.quantity:BitRate.fromstring",
"tags": ["release-group-prefix"]
}
}
},
"bonus": {
"bonus": {
"_": {
"regex": "x(\\d+)",
"private_parent": true,
"children": true,
"formatter": "eval:int",
"validator": {"__parent__": "import:seps_surround"},
"validate_all": true,
"conflict_solver": "lambda match, conflicting: match if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags else '__default__'"
}
}
},
"cd": {
"_cd_of_cd_count": {
"regex": "cd-?(?P<cd>\\d+)(?:-?of-?(?P<cd_count>\\d+))?",
"validator": {
"cd": "lambda match: 0 < match.value < 100",
"cd_count": "lambda match: 0 < match.value < 100"
},
"formatter": {"cd": "eval:int", "cd_count": "eval:int"},
"children": true,
"private_parent": true,
"properties": {"cd": [null], "cd_count": [null]}
},
"_cd_count": {
"regex": "(?P<cd_count>\\d+)-?cds?",
"validator": {
"cd": "lambda match: 0 < match.value < 100",
"cd_count": "lambda match: 0 < match.value < 100"
},
"formatter": {"cd_count": "eval:int"},
"children": true,
"private_parent": true,
"properties": {"cd": [null], "cd_count": [null]}
}
},
"container": {
"subtitles": ["srt", "idx", "sub", "ssa", "ass"],
"info": ["nfo"],
"subtitles": [
"srt",
"idx",
"sub",
"ssa",
"ass"
],
"info": [
"nfo"
],
"videos": [
"3g2",
"3gp",
@ -93,28 +195,102 @@
"wma",
"wmv"
],
"torrent": ["torrent"],
"nzb": ["nzb"]
"torrent": [
"torrent"
],
"nzb": [
"nzb"
]
},
"country": {
"synonyms": {
"ES": ["españa"],
"GB": ["UK"],
"BR": ["brazilian", "bra"],
"CA": ["québec", "quebec", "qc"],
"MX": ["Latinoamérica", "latin america"]
"ES": [
"españa"
],
"GB": [
"UK"
],
"BR": [
"brazilian",
"bra"
],
"CA": [
"québec",
"quebec",
"qc"
],
"MX": [
"Latinoamérica",
"latin america"
]
}
},
"edition": {
"edition": {
"Collector": {"string": ["collector"], "regex": ["collector'?s?-edition", "edition-collector"]},
"Special": [
{"regex": ["special-edition", "edition-special"], "conflict_solver": "lambda match, other: other if other.name == 'episode_details' and other.value == 'Special' else '__default__'"},
{"string": "se", "tags": "has-neighbor"}
],
"Director's Definitive Cut": "ddc",
"Criterion": {"string": ["CC", "Criterion"], "regex": ["criterion-edition", "edition-criterion"] },
"Deluxe": {"string": ["deluxe"], "regex": ["deluxe-edition", "edition-deluxe"] },
"Limited": {"string": ["limited"], "regex": ["limited-edition"], "tags": ["has-neighbor", "release-group-prefix"]},
"Theatrical": {"string": ["theatrical"], "regex": ["theatrical-cut", "theatrical-edition"]},
"Director's Cut": {"string": ["DC"], "regex": ["director'?s?-cut", "director'?s?-cut-edition", "edition-director'?s?-cut"]},
"Extended": {"string": ["extended"], "regex": ["extended-?cut", "extended-?version"], "tags": ["has-neighbor", "release-group-prefix"]},
"Alternative Cut": {"regex": ["alternat(e|ive)(?:-?Cut)?"], "tags": ["has-neighbor", "release-group-prefix"]},
"Remastered": [
{"string": "Remastered", "tags": ["has-neighbor", "release-group-prefix"]},
{"regex": "4k-remaster(?:ed)?", "tags": ["release-group-prefix"]}
],
"Restored": [
{"string": "Restored", "tags": ["has-neighbor", "release-group-prefix"]},
{"regex": "4k-restore(?:d)?", "tags": ["release-group-prefix"]}
],
"Uncensored": {"string": "Uncensored", "tags": ["has-neighbor", "release-group-prefix"]},
"Uncut": {"string": "Uncut", "tags": ["has-neighbor", "release-group-prefix"]},
"Unrated": {"string": "Unrated", "tags": ["has-neighbor", "release-group-prefix"]},
"Festival": {"string": "Festival", "tags": ["has-neighbor-before", "has-neighbor-after"]},
"IMAX": {"string": ["imax"], "regex": ["imax-edition"]},
"Fan": {"regex": ["fan-edit(?:ion)?", "fan-collection"]},
"Ultimate": {"regex": ["ultimate-edition"]},
"_Ultimate_Collector": {"regex": ["ultimate-collector'?s?-edition"], "value": ["Ultimate", "Collector"]},
"_Ultimate_Fan": {"regex": ["ultimate-fan-edit(?:ion)?", "ultimate-fan-collection"], "value": ["Ultimate", "Fan"]}
}
},
"episodes": {
"season_max_range": 100,
"episode_max_range": 100,
"max_range_gap": 1,
"season_markers": ["s"],
"season_ep_markers": ["x"],
"disc_markers": ["d"],
"episode_markers": ["xe", "ex", "ep", "e", "x"],
"range_separators": ["-", "~", "to", "a"],
"discrete_separators": ["+", "&", "and", "et"],
"season_markers": [
"s"
],
"season_ep_markers": [
"x"
],
"disc_markers": [
"d"
],
"episode_markers": [
"xe",
"ex",
"ep",
"e",
"x"
],
"range_separators": [
"-",
"~",
"to",
"a"
],
"discrete_separators": [
"+",
"&",
"and",
"et"
],
"season_words": [
"season",
"saison",
@ -137,34 +313,87 @@
"capitulo",
"capitulos"
],
"of_words": ["of", "sur"],
"all_words": ["All"]
"of_words": [
"of",
"sur"
],
"all_words": [
"All"
]
},
"film": {
"film": {
"_f": {"regex": "f(\\d{1,2})", "name": "film", "validate_all": true, "validator": {"__parent__": "import:seps_surround"}, "private_parent": true, "children": true, "formatter": "eval:int"}
}
},
"language": {
"synonyms": {
"ell": ["gr", "greek"],
"spa": ["esp", "español", "espanol"],
"fra": ["français", "vf", "vff", "vfi", "vfq"],
"swe": ["se"],
"por_BR": ["po", "pb", "pob", "ptbr", "br", "brazilian"],
"deu_CH": ["swissgerman", "swiss german"],
"nld_BE": ["flemish"],
"ell": [
"gr",
"greek"
],
"spa": [
"esp",
"español",
"espanol"
],
"fra": [
"français",
"vf",
"vff",
"vfi",
"vfq"
],
"swe": [
"se"
],
"por_BR": [
"po",
"pb",
"pob",
"ptbr",
"br",
"brazilian"
],
"deu_CH": [
"swissgerman",
"swiss german"
],
"nld_BE": [
"flemish"
],
"cat": [
"català",
"castellano",
"espanol castellano",
"español castellano"
],
"ces": ["cz"],
"ukr": ["ua"],
"zho": ["cn"],
"jpn": ["jp"],
"hrv": ["scr"],
"mul": ["multi", "dl"]
"ces": [
"cz"
],
"ukr": [
"ua"
],
"zho": [
"cn"
],
"jpn": [
"jp"
],
"hrv": [
"scr"
],
"mul": [
"multi",
"multiple",
"dl"
]
},
"subtitle_affixes": [
"sub",
"subs",
"subtitle",
"subtitles",
"esub",
"esubs",
"subbed",
@ -187,37 +416,158 @@
"legendas",
"legendado",
"subtitulado",
"soft",
"subtitles"
"soft"
],
"subtitle_suffixes": ["subforced", "fansub", "hardsub"],
"language_affixes": ["dublado", "dubbed", "dub"],
"language_prefixes": ["true"],
"language_suffixes": ["audio"],
"weak_affixes": ["v", "audio", "true"]
"subtitle_suffixes": [
"subforced",
"fansub",
"hardsub"
],
"language_affixes": [
"dublado",
"dubbed",
"dub"
],
"language_prefixes": [
"true"
],
"language_suffixes": [
"audio"
],
"weak_affixes": [
"v",
"audio",
"true"
]
},
"other": {
"other": {
"Audio Fixed": {"regex": ["Audio-?Fix", "Audio-?Fixed"]},
"Sync Fixed": {"regex": ["Sync-?Fix", "Sync-?Fixed"]},
"Dual Audio": {"string": ["Dual"], "regex": ["Dual-?Audio"]},
"Widescreen": {"string": ["ws"], "regex": ["wide-?screen"]},
"Reencoded": {"regex": ["Re-?Enc(?:oded)?"]},
"_repack_with_count": {"regex": ["Repack(?P<proper_count>\\d*)", "Rerip(?P<proper_count>\\d*)"], "value": {"other": "Proper"}, "tags": ["streaming_service.prefix", "streaming_service.suffix"]},
"Proper": [
{"string": "Proper", "tags": ["has-neighbor", "streaming_service.prefix", "streaming_service.suffix"]},
{"regex": ["Real-Proper", "Real-Repack", "Real-Rerip"], "tags": ["streaming_service.prefix", "streaming_service.suffix", "real"]},
{"string": "Real", "tags": ["has-neighbor", "streaming_service.prefix", "streaming_service.suffix", "real"]}
],
"Fix": [
{"string": ["Fix", "Fixed"], "tags": ["has-neighbor-before", "has-neighbor-after", "streaming_service.prefix", "streaming_service.suffix"]},
{"string": ["Dirfix", "Nfofix", "Prooffix"], "tags": ["streaming_service.prefix", "streaming_service.suffix"]},
{"regex": ["(?:Proof-?)?Sample-?Fix"], "tags": ["streaming_service.prefix", "streaming_service.suffix"]}
],
"Fan Subtitled": {"string": "Fansub", "tags": "has-neighbor"},
"Fast Subtitled": {"string": "Fastsub", "tags": "has-neighbor"},
"Region 5": "R5",
"Region C": "RC",
"Preair": {"regex": "Pre-?Air"},
"PS Vita": [
{"regex": "(?:PS-?)Vita"},
{"string": "Vita", "tags": "has-neighbor"}
],
"_HdRip": {"value": {"other": "HD", "another": "Rip"}, "regex": ["(HD)(?P<another>Rip)"], "private_parent": true, "children": true, "validator":{"__parent__": "import:seps_surround"}, "validate_all": true},
"Screener": [
"Screener",
{"regex": "Scr(?:eener)?", "validator": null, "tags": ["other.validate.screener", "source-prefix", "source-suffix"]}
],
"Remux": "Remux",
"Hybrid": "Hybrid",
"PAL": "PAL",
"SECAM": "SECAM",
"NTSC": "NTSC",
"XXX": "XXX",
"2in1": "2in1",
"3D": {"string": "3D", "tags": "has-neighbor"},
"High Quality": {"string": "HQ", "tags": "uhdbluray-neighbor"},
"High Resolution": "HR",
"Line Dubbed": "LD",
"Mic Dubbed": "MD",
"Micro HD": ["mHD", "HDLight"],
"Low Definition": "LDTV",
"High Frame Rate": "HFR",
"Variable Frame Rate": "VFR",
"HD": {"string": "HD", "validator": null, "tags": ["streaming_service.prefix", "streaming_service.suffix"]},
"Full HD": {"string": ["FHD"],"regex": ["Full-?HD"], "validator": null, "tags": ["streaming_service.prefix", "streaming_service.suffix"]},
"Ultra HD": {"string": ["UHD"],"regex": ["Ultra-?(?:HD)?"], "validator": null, "tags": ["streaming_service.prefix", "streaming_service.suffix"]},
"Upscaled": {"regex": "Upscaled?"},
"Complete": {"string": ["Complet", "Complete"], "tags": ["has-neighbor", "release-group-prefix"]},
"Classic": {"string": "Classic", "tags": ["has-neighbor", "release-group-prefix"]},
"Bonus": {"string": "Bonus", "tags": ["has-neighbor", "release-group-prefix"]},
"Trailer": {"string": "Trailer", "tags": ["has-neighbor", "release-group-prefix"]},
"Retail": {"string": "Retail", "tags": ["has-neighbor", "release-group-prefix"]},
"Colorized": {"string": "Colorized", "tags": ["has-neighbor", "release-group-prefix"]},
"Internal": {"string": "Internal", "tags": ["has-neighbor", "release-group-prefix"]},
"Line Audio": {"string": "LiNE", "tags": ["has-neighbor-before", "has-neighbor-after", "release-group-prefix"]},
"Read NFO": {"regex": "Read-?NFO"},
"Converted": {"string": "CONVERT", "tags": "has-neighbor"},
"Documentary": {"string": ["DOCU", "DOKU"], "tags": "has-neighbor"},
"Open Matte": {"regex": "(?:OM|Open-?Matte)", "tags": "has-neighbor"},
"Straight to Video": {"string": "STV", "tags": "has-neighbor"},
"Original Aspect Ratio": {"string": "OAR", "tags": "has-neighbor"},
"East Coast Feed": {"regex": "(?:Live-)?(?:Episode-)?East-?(?:Coast-)?Feed"},
"West Coast Feed": {"regex": "(?:Live-)?(?:Episode-)?West-?(?:Coast-)?Feed"},
"Original Video": {"string": ["VO", "OV"], "tags": "has-neighbor"},
"Original Animated Video": {"string": ["Ova", "Oav"]},
"Original Net Animation": "Ona",
"Original Animation DVD": "Oad",
"Mux": {"string": "Mux", "validator": "import:seps_after", "tags": ["other.validate.mux", "video-codec-prefix", "source-suffix"]},
"Hardcoded Subtitles": ["HC", "vost"],
"Standard Dynamic Range": {"string": "SDR", "tags": "uhdbluray-neighbor"},
"HDR10": {"regex": "HDR(?:10)?", "tags": "uhdbluray-neighbor"},
"Dolby Vision": {"regex": "Dolby-?Vision", "tags": "uhdbluray-neighbor"},
"BT.2020": {"regex": "BT-?2020","tags": "uhdbluray-neighbor"},
"Sample": {"string": "Sample", "tags": ["at-end", "not-a-release-group"]},
"Extras": [
{"string": "Extras", "tags": "has-neighbor"},
{"regex": "Digital-?Extras?"}
],
"Proof": {"string": "Proof", "tags": ["at-end", "not-a-release-group"]},
"Obfuscated": {"string": ["Obfuscated", "Scrambled"], "tags": ["at-end", "not-a-release-group"]},
"Repost": {"string": ["xpost", "postbot", "asrequested"], "tags": "not-a-release-group"},
"_complete_words": {"callable": "import:guessit.rules.properties.other:complete_words", "season_words": ["seasons?", "series?"], "complete_article_words": ["The"]}
}
},
"part": {
"prefixes": ["pt", "part"]
"prefixes": [
"pt",
"part"
]
},
"release_group": {
"forbidden_names": ["bonus", "by", "for", "par", "pour", "rip"],
"forbidden_names": [
"bonus",
"by",
"for",
"par",
"pour",
"rip"
],
"ignored_seps": "[]{}()"
},
"screen_size": {
"frame_rates": [
"23.976",
"24",
"25",
"29.970",
"30",
"48",
"50",
"60",
"120"
"23\\.976",
"24(?:\\.0{1,3})?",
"25(?:\\.0{1,3})?",
"29\\.970",
"30(?:\\.0{1,3})?",
"48(?:\\.0{1,3})?",
"50(?:\\.0{1,3})?",
"60(?:\\.0{1,3})?",
"120(?:\\.0{1,3})?"
],
"min_ar": 1.333,
"max_ar": 1.898,
"interlaced": ["360", "480", "576", "900", "1080"],
"interlaced": [
"360",
"480",
"540",
"576",
"900",
"1080"
],
"progressive": [
"360",
"480",
@ -232,40 +582,90 @@
"4320"
]
},
"source": {
"rip_prefix": "(?P<other>Rip)-?",
"rip_suffix": "-?(?P<other>Rip)"
},
"website": {
"safe_tlds": ["com", "net", "org"],
"safe_subdomains": ["www"],
"safe_prefixes": ["co", "com", "net", "org"],
"prefixes": ["from"]
"safe_tlds": [
"com",
"net",
"org"
],
"safe_subdomains": [
"www"
],
"safe_prefixes": [
"co",
"com",
"net",
"org"
],
"prefixes": [
"from"
]
},
"streaming_service": {
"A&E": ["AE", "A&E"],
"9Now": "9NOW",
"A&E": [
"AE",
"A&E"
],
"ABC": "AMBC",
"ABC Australia": "AUBC",
"Al Jazeera English": "AJAZ",
"AMC": "AMC",
"Amazon Prime": ["AMZN", "Amazon", "re:Amazon-?Prime"],
"Adult Swim": ["AS", "re:Adult-?Swim"],
"Amazon Prime": [
"AMZN",
"Amazon",
"re:Amazon-?Prime"
],
"Adult Swim": [
"AS",
"re:Adult-?Swim"
],
"America's Test Kitchen": "ATK",
"Animal Planet": "ANPL",
"AnimeLab": "ANLB",
"AOL": "AOL",
"AppleTV": ["ATVP", "ATV+"],
"AppleTV": [
"ATVP",
"ATV+",
"APTV"
],
"ARD": "ARD",
"BBC iPlayer": ["iP", "re:BBC-?iPlayer"],
"BBC iPlayer": [
"iP",
"re:BBC-?iPlayer"
],
"Binge": "BNGE",
"Blackpills": "BKPL",
"BluTV": "BLU",
"Boomerang": "BOOM",
"Disney+": "DSNP",
"BravoTV": "BRAV",
"Canal+": "CNLP",
"Cartoon Network": "CN",
"CBC": "CBC",
"CBS": "CBS",
"CNBC": "CNBC",
"Comedy Central": ["CC", "re:Comedy-?Central"],
"Channel 4": "4OD",
"Comedy Central": [
"CC",
"re:Comedy-?Central"
],
"Channel 4": [
"ALL4",
"4OD"
],
"CHRGD": "CHGD",
"Cinemax": "CMAX",
"Country Music Television": "CMT",
"Comedians in Cars Getting Coffee": "CCGC",
"Crunchy Roll": ["CR", "re:Crunchy-?Roll"],
"Crave": "CRAV",
"Crunchy Roll": [
"CR",
"re:Crunchy-?Roll"
],
"Crackle": "CRKL",
"CSpan": "CSPN",
"CTV": "CTV",
@ -274,10 +674,20 @@
"Daisuki": "DSKI",
"DC Universe": "DCU",
"Deadhouse Films": "DHF",
"DramaFever": ["DF", "DramaFever"],
"DramaFever": [
"DF",
"DramaFever"
],
"Digiturk Diledigin Yerde": "DDY",
"Discovery": ["DISC", "Discovery"],
"Disney": ["DSNY", "Disney"],
"Discovery": [
"DISC",
"Discovery"
],
"Discovery Plus": "DSCP",
"Disney": [
"DSNY",
"Disney"
],
"DIY Network": "DIY",
"Doc Club": "DOCC",
"DPlay": "DPLY",
@ -286,51 +696,108 @@
"El Trece": "ETTV",
"ESPN": "ESPN",
"Esquire": "ESQ",
"Facebook Watch": "FBWatch",
"Family": "FAM",
"Family Jr": "FJR",
"Fandor": "FANDOR",
"Food Network": "FOOD",
"Fox": "FOX",
"Fox Premium": "FOXP",
"Foxtel": "FXTL",
"Freeform": "FREE",
"FYI Network": "FYI",
"GagaOOLala": "Gaga",
"Global": "GLBL",
"GloboSat Play": "GLOB",
"Hallmark": "HLMK",
"HBO Go": ["HBO", "re:HBO-?Go"],
"HBO Go": [
"HBO",
"re:HBO-?Go"
],
"HBO Max": "HMAX",
"HGTV": "HGTV",
"History": ["HIST", "History"],
"History": [
"HIST",
"History"
],
"Hulu": "HULU",
"Investigation Discovery": "ID",
"IFC": "IFC",
"iTunes": ["iTunes", { "pattern": "iT", "ignore_case": false }],
"hoichoi": "HoiChoi",
"iflix": "IFX",
"iQIYI": "iQIYI",
"iTunes": [
"iTunes",
{"pattern": "iT", "ignore_case": false}
],
"ITV": "ITV",
"Knowledge Network": "KNOW",
"Lifetime": "LIFE",
"Motor Trend OnDemand": "MTOD",
"MBC": ["MBC", "MBCVOD"],
"MBC": [
"MBC",
"MBCVOD"
],
"MSNBC": "MNBC",
"MTV": "MTV",
"National Geographic": ["NATG", "re:National-?Geographic"],
"NBA TV": ["NBA", "re:NBA-?TV"],
"MUBI": "MUBI",
"National Audiovisual Institute": "INA",
"National Film Board": "NFB",
"National Geographic": [
"NATG",
"re:National-?Geographic"
],
"NBA TV": [
"NBA",
"re:NBA-?TV"
],
"NBC": "NBC",
"Netflix": ["NF", "Netflix"],
"Netflix": [
"NF",
"Netflix"
],
"NFL": "NFL",
"NFL Now": "NFLN",
"NHL GameCenter": "GC",
"Nickelodeon": ["NICK", "Nickelodeon"],
"Nickelodeon": [
"NICK",
"Nickelodeon",
"NICKAPP"
],
"Norsk Rikskringkasting": "NRK",
"OnDemandKorea": ["ODK", "OnDemandKorea"],
"OnDemandKorea": [
"ODK",
"OnDemandKorea"
],
"Opto": "OPTO",
"Oprah Winfrey Network": "OWN",
"PBS": "PBS",
"PBS Kids": "PBSK",
"Peacock": [
"PCOK",
"Peacock"
],
"Playstation Network": "PSN",
"Pluzz": "PLUZ",
"PokerGO": "POGO",
"Rakuten TV": "RKTN",
"The Roku Channel": "ROKU",
"RTE One": "RTE",
"SBS (AU)": "SBS",
"SeeSo": ["SESO", "SeeSo"],
"RUUTU": "RUUTU",
"SBS": "SBS",
"Science Channel": "SCI",
"SeeSo": [
"SESO",
"SeeSo"
],
"Shomi": "SHMI",
"Showtime": "SHO",
"Sony": "SONY",
"Spike": "SPIK",
"Spike TV": ["SPKE", "re:Spike-?TV"],
"Spike TV": [
"SPKE",
"re:Spike-?TV"
],
"Sportsnet": "SNET",
"Sprout": "SPRT",
"Stan": "STAN",
@ -340,14 +807,22 @@
"Syfy": "SYFY",
"TBS": "TBS",
"TFou": "TFOU",
"The CW": ["CW", "re:The-?CW"],
"The CW": [
"CW",
"re:The-?CW"
],
"TLC": "TLC",
"TubiTV": "TUBI",
"TV3 Ireland": "TV3",
"TV4 Sweeden": "TV4",
"TVING": "TVING",
"TV Land": ["TVL", "re:TV-?Land"],
"TV Land": [
"TVL",
"re:TV-?Land"
],
"TVNZ": "TVNZ",
"UFC": "UFC",
"UFC Fight Pass": "FP",
"UKTV": "UKTV",
"Univision": "UNIV",
"USA Network": "USAN",

View File

@ -0,0 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Data
"""

View File

@ -6,11 +6,14 @@ Options
import copy
import json
import os
import pkgutil
import shlex
from argparse import ArgumentParser
try:
from importlib.resources import read_text
except ImportError:
from importlib_resources import read_text
def build_argument_parser():
"""
@ -142,7 +145,7 @@ def load_config(options):
configurations = []
if not options.get('no_default_config'):
default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
default_options_data = read_text('guessit.config', 'options.json')
default_options = json.loads(default_options_data)
configurations.append(default_options)
@ -176,7 +179,7 @@ def load_config(options):
if 'advanced_config' not in config:
# Guessit doesn't work without advanced_config, so we use default if no configuration files provides it.
default_options_data = pkgutil.get_data('guessit', 'config/options.json').decode('utf-8')
default_options_data = read_text('guessit.config', 'options.json')
default_options = json.loads(default_options_data)
config['advanced_config'] = default_options['advanced_config']
@ -246,17 +249,16 @@ def load_config_file(filepath):
:rtype:
"""
if filepath.endswith('.json'):
with open(filepath) as config_file_data:
with open(filepath, encoding='utf-8') as config_file_data:
return json.load(config_file_data)
if filepath.endswith('.yaml') or filepath.endswith('.yml'):
try:
import yaml # pylint:disable=import-outside-toplevel
with open(filepath) as config_file_data:
with open(filepath, encoding='utf-8') as config_file_data:
return yaml.load(config_file_data, yaml.SafeLoader)
except ImportError as err: # pragma: no cover
raise ConfigurationException('Configuration file extension is not supported. '
'PyYAML should be installed to support "%s" file' % (
filepath,)) from err
f'PyYAML should be installed to support "{filepath}" file') from err
try:
# Try to load input as JSON
@ -264,7 +266,7 @@ def load_config_file(filepath):
except: # pylint: disable=bare-except
pass
raise ConfigurationException('Configuration file extension is not supported for "%s" file.' % (filepath,))
raise ConfigurationException(f'Configuration file extension is not supported for "{filepath}" file.')
def get_options_file_locations(homedir, cwd, yaml_supported=False):

View File

@ -25,11 +25,11 @@ def build_or_pattern(patterns, name=None, escape=False):
if not or_pattern:
or_pattern.append('(?')
if name:
or_pattern.append('P<' + name + '>')
or_pattern.append(f'P<{name}>')
else:
or_pattern.append(':')
else:
or_pattern.append('|')
or_pattern.append('(?:%s)' % re.escape(pattern) if escape else pattern)
or_pattern.append(f'(?:{re.escape(pattern)})' if escape else pattern)
or_pattern.append(')')
return ''.join(or_pattern)

View File

@ -26,7 +26,7 @@ from .properties.other import other
from .properties.size import size
from .properties.bit_rate import bit_rate
from .properties.edition import edition
from .properties.cds import cds
from .properties.cd import cd
from .properties.bonus import bonus
from .properties.film import film
from .properties.part import part
@ -71,7 +71,7 @@ def rebulk_builder(config):
rebulk.rebulk(size(_config('size')))
rebulk.rebulk(bit_rate(_config('bit_rate')))
rebulk.rebulk(edition(_config('edition')))
rebulk.rebulk(cds(_config('cds')))
rebulk.rebulk(cd(_config('cd')))
rebulk.rebulk(bonus(_config('bonus')))
rebulk.rebulk(film(_config('film')))
rebulk.rebulk(part(_config('part')))

View File

@ -13,3 +13,10 @@ title_seps = r'-+/\|' # separators for title
dash = (r'-', r'['+re.escape(seps_no_fs)+']') # abbreviation used by many rebulk objects.
alt_dash = (r'@', r'['+re.escape(seps_no_fs)+']') # abbreviation used by many rebulk objects.
def optional(pattern):
"""
Make a regex pattern optional
"""
return '(?:' + pattern + ')?'

View File

@ -11,13 +11,21 @@ _dsep = r'[-/ \.]'
_dsep_bis = r'[-/ \.x]'
date_regexps = [
# pylint:disable=consider-using-f-string
re.compile(r'%s((\d{8}))%s' % (_dsep, _dsep), re.IGNORECASE),
# pylint:disable=consider-using-f-string
re.compile(r'%s((\d{6}))%s' % (_dsep, _dsep), re.IGNORECASE),
# pylint:disable=consider-using-f-string
re.compile(r'(?:^|[^\d])((\d{2})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
# pylint:disable=consider-using-f-string
re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{2}))(?:$|[^\d])' % (_dsep, _dsep), re.IGNORECASE),
# pylint:disable=consider-using-f-string
re.compile(r'(?:^|[^\d])((\d{4})%s(\d{1,2})%s(\d{1,2}))(?:$|[^\d])' % (_dsep_bis, _dsep), re.IGNORECASE),
# pylint:disable=consider-using-f-string
re.compile(r'(?:^|[^\d])((\d{1,2})%s(\d{1,2})%s(\d{4}))(?:$|[^\d])' % (_dsep, _dsep_bis), re.IGNORECASE),
# pylint:disable=consider-using-f-string
re.compile(r'(?:^|[^\d])((\d{1,2}(?:st|nd|rd|th)?%s(?:[a-z]{3,10})%s\d{4}))(?:$|[^\d])' % (_dsep, _dsep),
# pylint:disable=consider-using-f-string
re.IGNORECASE)]

View File

@ -81,7 +81,7 @@ def __parse_roman(value):
:rtype:
"""
if not __romanNumeralPattern.search(value):
raise ValueError('Invalid Roman numeral: %s' % value)
raise ValueError(f'Invalid Roman numeral: {value}')
result = 0
index = 0

View File

@ -59,10 +59,10 @@ class Quantity(object):
return not self == other
def __repr__(self):
return '<{0} [{1}]>'.format(self.__class__.__name__, self)
return f'<{self.__class__.__name__} [{self}]>'
def __str__(self):
return '{0}{1}'.format(self.magnitude, self.units)
return f'{self.magnitude}{self.units}'
class Size(Quantity):

View File

@ -9,6 +9,7 @@ from rebulk.remodule import re
from ..common import dash
from ..common.pattern import is_disabled
from ..common.validators import seps_before, seps_after
from ...config import load_config_patterns
audio_properties = ['audio_codec', 'audio_profile', 'audio_channels']
@ -22,8 +23,8 @@ def audio_codec(config): # pylint:disable=unused-argument
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk()\
.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])\
rebulk = Rebulk() \
.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) \
.string_defaults(ignore_case=True)
def audio_codec_priority(match1, match2):
@ -46,46 +47,19 @@ def audio_codec(config): # pylint:disable=unused-argument
conflict_solver=audio_codec_priority,
disabled=lambda context: is_disabled(context, 'audio_codec'))
rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3")
rebulk.string("MP2", value="MP2")
rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='Dolby Digital')
rebulk.regex('Dolby-?Atmos', 'Atmos', value='Dolby Atmos')
rebulk.string("AAC", value="AAC")
rebulk.string('EAC3', 'DDP', 'DD+', value='Dolby Digital Plus')
rebulk.string("Flac", value="FLAC")
rebulk.string("DTS", value="DTS")
rebulk.regex('DTS-?HD', 'DTS(?=-?MA)', value='DTS-HD',
conflict_solver=lambda match, other: other if other.name == 'audio_codec' else '__default__')
rebulk.regex('True-?HD', value='Dolby TrueHD')
rebulk.string('Opus', value='Opus')
rebulk.string('Vorbis', value='Vorbis')
rebulk.string('PCM', value='PCM')
rebulk.string('LPCM', value='LPCM')
load_config_patterns(rebulk, config.get('audio_codec'))
rebulk.defaults(clear=True,
name='audio_profile',
disabled=lambda context: is_disabled(context, 'audio_profile'))
rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD'])
rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD'])
rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS'])
rebulk.string('HE', value='High Efficiency', tags=['audio_profile.rule', 'AAC'])
rebulk.string('LC', value='Low Complexity', tags=['audio_profile.rule', 'AAC'])
rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital'])
rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital'])
load_config_patterns(rebulk, config.get('audio_profile'))
rebulk.defaults(clear=True,
name="audio_channels",
disabled=lambda context: is_disabled(context, 'audio_channels'))
rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels')
rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels')
rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels')
for value, items in config.get('audio_channels').items():
for item in items:
if item.startswith('re:'):
rebulk.regex(item[3:], value=value, children=True)
else:
rebulk.string(item, value=value)
load_config_patterns(rebulk, config.get('audio_channels'))
rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule,
AudioChannelsValidatorRule)
@ -139,20 +113,20 @@ class AudioProfileRule(Rule):
def when(self, matches, context):
profile_list = matches.named('audio_profile',
lambda match: 'audio_profile.rule' in match.tags and
self.codec in match.tags)
self.codec in match.tags)
ret = []
for profile in profile_list:
codec = matches.at_span(profile.span,
lambda match: match.name == 'audio_codec' and
match.value == self.codec, 0)
match.value == self.codec, 0)
if not codec:
codec = matches.previous(profile,
lambda match: match.name == 'audio_codec' and
match.value == self.codec)
match.value == self.codec)
if not codec:
codec = matches.next(profile,
lambda match: match.name == 'audio_codec' and
match.value == self.codec)
match.value == self.codec)
if not codec:
ret.append(profile)
if codec:

View File

@ -3,15 +3,14 @@
"""
video_bit_rate and audio_bit_rate properties
"""
from rebulk.remodule import re
from rebulk import Rebulk
from rebulk.remodule import re
from rebulk.rules import Rule, RemoveMatch, RenameMatch
from ..common import dash, seps
from ..common.pattern import is_disabled
from ..common.quantity import BitRate
from ..common.validators import seps_surround
from ...config import load_config_patterns
def bit_rate(config): # pylint:disable=unused-argument
@ -27,13 +26,8 @@ def bit_rate(config): # pylint:disable=unused-argument
and is_disabled(context, 'video_bit_rate')))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name='audio_bit_rate', validator=seps_surround)
rebulk.regex(r'\d+-?[kmg]b(ps|its?)', r'\d+\.\d+-?[kmg]b(ps|its?)',
conflict_solver=(
lambda match, other: match
if other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags
else other
),
formatter=BitRate.fromstring, tags=['release-group-prefix'])
load_config_patterns(rebulk, config.get('bit_rate'))
rebulk.rules(BitRateTypeRule)

View File

@ -3,14 +3,13 @@
"""
bonus property
"""
from rebulk.remodule import re
from rebulk import Rebulk, AppendMatch, Rule
from rebulk.remodule import re
from .title import TitleFromPosition
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
from ...config import load_config_patterns
def bonus(config): # pylint:disable=unused-argument
@ -23,14 +22,9 @@ def bonus(config): # pylint:disable=unused-argument
:rtype: Rebulk
"""
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)
rebulk = rebulk.regex_defaults(name='bonus', flags=re.IGNORECASE)
rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
validator={'__parent__': seps_surround},
validate_all=True,
conflict_solver=lambda match, conflicting: match
if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
else '__default__')
load_config_patterns(rebulk, config.get('bonus'))
rebulk.rules(BonusTitleRule)

View File

@ -0,0 +1,28 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
cd and cd_count properties
"""
from rebulk import Rebulk
from rebulk.remodule import re
from ..common import dash
from ..common.pattern import is_disabled
from ...config import load_config_patterns
def cd(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
load_config_patterns(rebulk, config)
return rebulk

View File

@ -1,41 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
cd and cd_count properties
"""
from rebulk.remodule import re
from rebulk import Rebulk
from ..common import dash
from ..common.pattern import is_disabled
def cds(config): # pylint:disable=unused-argument
"""
Builder for rebulk object.
:param config: rule configuration
:type config: dict
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'cd'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.regex(r'cd-?(?P<cd>\d+)(?:-?of-?(?P<cd_count>\d+))?',
validator={'cd': lambda match: 0 < match.value < 100,
'cd_count': lambda match: 0 < match.value < 100},
formatter={'cd': int, 'cd_count': int},
children=True,
private_parent=True,
properties={'cd': [None], 'cd_count': [None]})
rebulk.regex(r'(?P<cd_count>\d+)-?cds?',
validator={'cd': lambda match: 0 < match.value < 100,
'cd_count': lambda match: 0 < match.value < 100},
formatter={'cd_count': int},
children=True,
private_parent=True,
properties={'cd': [None], 'cd_count': [None]})
return rebulk

View File

@ -65,7 +65,7 @@ class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: dis
return 'UK'
return str(babelfish.Country(alpha2))
def reverse(self, name): # pylint:disable=arguments-differ
def reverse(self, name): # pylint:disable=arguments-renamed
# exceptions come first, as they need to override a potential match
# with any of the other guessers
try:

View File

@ -3,12 +3,13 @@
"""
edition property
"""
from rebulk import Rebulk
from rebulk.remodule import re
from rebulk import Rebulk
from ..common import dash
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
from ...config import load_config_patterns
def edition(config): # pylint:disable=unused-argument
@ -21,32 +22,9 @@ def edition(config): # pylint:disable=unused-argument
:rtype: Rebulk
"""
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition'))
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name='edition', validator=seps_surround)
rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector')
rebulk.regex('special-edition', 'edition-special', value='Special',
conflict_solver=lambda match, other: other
if other.name == 'episode_details' and other.value == 'Special'
else '__default__')
rebulk.string('se', value='Special', tags='has-neighbor')
rebulk.string('ddc', value="Director's Definitive Cut")
rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion')
rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe')
rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix'])
rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical')
rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC',
value="Director's Cut")
rebulk.regex('extended', 'extended-?cut', 'extended-?version',
value='Extended', tags=['has-neighbor', 'release-group-prefix'])
rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix'])
for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'):
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after'])
rebulk.regex('imax', 'imax-edition', value='IMAX')
rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan')
rebulk.regex('ultimate-edition', value='Ultimate')
rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector'])
rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan'])
load_config_patterns(rebulk, config.get('edition'))
return rebulk

View File

@ -6,9 +6,11 @@ film property
from rebulk import Rebulk, AppendMatch, Rule
from rebulk.remodule import re
from ..common import dash
from ..common.formatters import cleanup
from ..common.pattern import is_disabled
from ..common.validators import seps_surround
from ...config import load_config_patterns
def film(config): # pylint:disable=unused-argument
@ -17,10 +19,11 @@ def film(config): # pylint:disable=unused-argument
:return: Created Rebulk object
:rtype: Rebulk
"""
rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, validate_all=True, validator={'__parent__': seps_surround})
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'film'))
rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name='film', validator=seps_surround)
rebulk.regex(r'f(\d{1,2})', name='film', private_parent=True, children=True, formatter=int,
disabled=lambda context: is_disabled(context, 'film'))
load_config_patterns(rebulk, config.get('film'))
rebulk.rules(FilmTitleRule)

View File

@ -13,8 +13,8 @@ from rebulk.remodule import re
from ..common import seps
from ..common.pattern import is_disabled
from ..common.words import iter_words
from ..common.validators import seps_surround
from ..common.words import iter_words
def language(config, common_words):
@ -64,7 +64,8 @@ def language(config, common_words):
SubtitlePrefixLanguageRule,
SubtitleSuffixLanguageRule,
RemoveLanguage,
RemoveInvalidLanguages(common_words))
RemoveInvalidLanguages(common_words),
RemoveUndeterminedLanguages)
babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms'])
@ -102,7 +103,7 @@ class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=m
def convert(self, alpha3, country=None, script=None):
return str(babelfish.Language(alpha3, country, script))
def reverse(self, name): # pylint:disable=arguments-differ
def reverse(self, name): # pylint:disable=arguments-renamed
name = name.lower()
# exceptions come first, as they need to override a potential match
# with any of the other guessers
@ -165,7 +166,7 @@ class LanguageWord(object):
return LanguageWord(self.start, self.next_word.end, value, self.input_string, self.next_word.next_word)
def __repr__(self):
return '<({start},{end}): {value}'.format(start=self.start, end=self.end, value=self.value)
return f'<({self.start},{self.end}): {self.value}'
def to_rebulk_match(language_match):
@ -226,7 +227,7 @@ class LanguageFinder(object):
key = match.property_name
if match.lang == UNDETERMINED:
undetermined_map[key].add(match)
elif match.lang == 'mul':
elif match.lang == MULTIPLE:
multi_map[key].add(match)
else:
regular_lang_map[key].add(match)
@ -291,7 +292,7 @@ class LanguageFinder(object):
if match:
yield match
def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix): # pylint:disable=inconsistent-return-statements
def find_match_for_word(self, word, fallback_word, affixes, is_affix, strip_affix):
"""
Return the language match for the given word and affixes.
"""
@ -322,6 +323,7 @@ class LanguageFinder(object):
if match:
return match
return None
def find_language_match_for_word(self, word, key='language'): # pylint:disable=inconsistent-return-statements
"""
@ -508,3 +510,22 @@ class RemoveInvalidLanguages(Rule):
to_remove.append(match)
return to_remove
class RemoveUndeterminedLanguages(Rule):
"""Remove "und" language matches when next other language if found."""
consequence = RemoveMatch
priority = 32
def when(self, matches, context):
to_remove = []
for match in matches.range(0, len(matches.input_string),
predicate=lambda m: m.name in ('language', 'subtitle_language')):
if match.value == "und":
previous = matches.previous(match, index=0)
next_ = matches.next(match, index=0)
if previous and previous.name == 'language' or next_ and next_.name == 'language':
to_remove.append(match)
return to_remove

View File

@ -3,15 +3,16 @@
"""
other property
"""
import copy
from rebulk import Rebulk, Rule, RemoveMatch, RenameMatch, POST_PROCESS, AppendMatch
from rebulk.match import Match
from rebulk.remodule import re
from ..common import dash
from ..common import seps
from ..common.pattern import is_disabled
from ..common.validators import seps_after, seps_before, seps_surround, and_
from ...config import load_config_patterns
from ...reutils import build_or_pattern
from ...rules.common.formatters import raw_cleanup
@ -29,34 +30,21 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True)
rebulk.defaults(name="other", validator=seps_surround)
rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed')
rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed')
rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio')
rebulk.regex('ws', 'wide-?screen', value='Widescreen')
rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded')
load_config_patterns(rebulk, config.get('other'))
rebulk.string('Repack', 'Rerip', value='Proper',
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Proper', value='Proper',
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix'])
rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
ValidateAtEnd, ValidateReal, ProperCountRule)
rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper',
tags=['streaming_service.prefix', 'streaming_service.suffix', 'real'])
rebulk.regex('Real', value='Proper',
tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real'])
return rebulk
rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after',
'streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix',
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix',
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor')
rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor')
season_words = build_or_pattern(["seasons?", "series?"])
complete_articles = build_or_pattern(["The"])
def complete_words(rebulk: Rebulk, season_words, complete_article_words):
"""
Custom pattern to find complete seasons from words.
"""
season_words_pattern = build_or_pattern(season_words)
complete_article_words_pattern = build_or_pattern(complete_article_words)
def validate_complete(match):
"""
@ -71,82 +59,13 @@ def other(config): # pylint:disable=unused-argument,too-many-statements
return False
return True
rebulk.regex('(?P<completeArticle>' + complete_articles + '-)?' +
'(?P<completeWordsBefore>' + season_words + '-)?' +
'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?',
rebulk.regex('(?P<completeArticle>' + complete_article_words_pattern + '-)?' +
'(?P<completeWordsBefore>' + season_words_pattern + '-)?' +
'Complete' + '(?P<completeWordsAfter>-' + season_words_pattern + ')?',
private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'],
value={'other': 'Complete'},
tags=['release-group-prefix'],
validator={'__parent__': and_(seps_surround, validate_complete)})
rebulk.string('R5', value='Region 5')
rebulk.string('RC', value='Region C')
rebulk.regex('Pre-?Air', value='Preair')
rebulk.regex('(?:PS-?)Vita', value='PS Vita')
rebulk.regex('Vita', value='PS Vita', tags='has-neighbor')
rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'},
private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True)
for value in ('Screener', 'Remux', 'Hybrid', 'PAL', 'SECAM', 'NTSC', 'XXX'):
rebulk.string(value, value=value)
rebulk.string('3D', value='3D', tags='has-neighbor')
rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor')
rebulk.string('HR', value='High Resolution')
rebulk.string('LD', value='Line Dubbed')
rebulk.string('MD', value='Mic Dubbed')
rebulk.string('mHD', 'HDLight', value='Micro HD')
rebulk.string('LDTV', value='Low Definition')
rebulk.string('HFR', value='High Frame Rate')
rebulk.string('VFR', value='Variable Frame Rate')
rebulk.string('HD', value='HD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None,
tags=['streaming_service.prefix', 'streaming_service.suffix'])
rebulk.regex('Upscaled?', value='Upscaled')
for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail',
'Colorized', 'Internal'):
rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix'])
rebulk.regex('LiNE', value='Line Audio', tags=['has-neighbor-before', 'has-neighbor-after', 'release-group-prefix'])
rebulk.regex('Read-?NFO', value='Read NFO')
rebulk.string('CONVERT', value='Converted', tags='has-neighbor')
rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor')
rebulk.string('OM', value='Open Matte', tags='has-neighbor')
rebulk.string('STV', value='Straight to Video', tags='has-neighbor')
rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor')
rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix'])
for coast in ('East', 'West'):
rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed')
rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor')
rebulk.string('Ova', 'Oav', value='Original Animated Video')
rebulk.regex('Scr(?:eener)?', value='Screener', validator=None,
tags=['other.validate.screener', 'source-prefix', 'source-suffix'])
rebulk.string('Mux', value='Mux', validator=seps_after,
tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix'])
rebulk.string('HC', 'vost', value='Hardcoded Subtitles')
rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor')
rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor')
rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor')
rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor')
rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group'])
rebulk.string('Extras', value='Extras', tags='has-neighbor')
rebulk.regex('Digital-?Extras?', value='Extras')
rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group'])
rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group'])
rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group')
rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore,
ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor,
ValidateAtEnd, ValidateReal, ProperCountRule)
return rebulk
class ProperCountRule(Rule):
@ -165,15 +84,30 @@ class ProperCountRule(Rule):
raws = {} # Count distinct raw values
for proper in propers:
raws[raw_cleanup(proper.raw)] = proper
proper_count_match = copy.copy(propers[-1])
proper_count_match.name = 'proper_count'
value = 0
for raw in raws.values():
value += 2 if 'real' in raw.tags else 1
start = None
end = None
proper_count_matches = []
for proper in raws.values():
if not start or start > proper.start:
start = proper.start
if not end or end < proper.end:
end = proper.end
if proper.children.named('proper_count', 0):
value += int(proper.children.named('proper_count', 0).value)
elif 'real' in proper.tags:
value += 2
else:
value += 1
proper_count_match = Match(name='proper_count', start=start, end=end, input_string=matches.input_string)
proper_count_match.value = value
return proper_count_match
proper_count_matches.append(proper_count_match)
return proper_count_matches
class RenameAnotherToOther(Rule):
@ -360,7 +294,7 @@ class ValidateAtEnd(Rule):
predicate=lambda m: m.name == 'other' and 'at-end' in m.tags):
if (matches.holes(match.end, filepart.end, predicate=lambda m: m.value.strip(seps)) or
matches.range(match.end, filepart.end, predicate=lambda m: m.name not in (
'other', 'container'))):
'other', 'container'))):
to_remove.append(match)
return to_remove

View File

@ -26,7 +26,7 @@ def screen_size(config):
"""
interlaced = frozenset(config['interlaced'])
progressive = frozenset(config['progressive'])
frame_rates = [re.escape(rate) for rate in config['frame_rates']]
frame_rates = frozenset(config['frame_rates'])
min_ar = config['min_ar']
max_ar = config['max_ar']
@ -45,11 +45,12 @@ def screen_size(config):
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)' + frame_rate_pattern + '?')
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?(?:hd)')
rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?x?')
rebulk.string('4k', value='2160p')
rebulk.string('4k', value='2160p',
conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else match)
rebulk.regex(r'(?P<width>\d{3,4})-?(?:x|\*)-?(?P<height>\d{3,4})',
conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)
rebulk.regex(frame_rate_pattern + '(p|fps)', name='frame_rate',
rebulk.regex(frame_rate_pattern + '-?(?:p|fps)', name='frame_rate',
formatter=FrameRate.fromstring, disabled=lambda context: is_disabled(context, 'frame_rate'))
rebulk.rules(PostProcessScreenSize(progressive, min_ar, max_ar), ScreenSizeOnlyOne, ResolveScreenSizeConflicts)
@ -89,7 +90,7 @@ class PostProcessScreenSize(Rule):
scan_type = (values.get('scan_type') or 'p').lower()
height = values['height']
if 'width' not in values:
match.value = '{0}{1}'.format(height, scan_type)
match.value = f'{height}{scan_type}'
continue
width = values['width']
@ -102,9 +103,9 @@ class PostProcessScreenSize(Rule):
to_append.append(aspect_ratio)
if height in self.standard_heights and self.min_ar < calculated_ar < self.max_ar:
match.value = '{0}{1}'.format(height, scan_type)
match.value = f'{height}{scan_type}'
else:
match.value = '{0}x{1}'.format(width, height)
match.value = f'{width}x{height}'
return to_append

View File

@ -5,12 +5,11 @@ source property
"""
import copy
from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
from rebulk.remodule import re
from rebulk import AppendMatch, Rebulk, RemoveMatch, Rule
from .audio_codec import HqConflictRule
from ..common import dash, seps
from ..common import dash, seps, optional
from ..common.pattern import is_disabled
from ..common.validators import seps_before, seps_after, or_
@ -31,78 +30,74 @@ def source(config): # pylint:disable=unused-argument
validate_all=True,
validator={'__parent__': or_(seps_before, seps_after)})
rip_prefix = '(?P<other>Rip)-?'
rip_suffix = '-?(?P<other>Rip)'
rip_optional_suffix = '(?:' + rip_suffix + ')?'
rip_prefix = config['rip_prefix']
rip_suffix = config['rip_suffix']
def build_source_pattern(*patterns, **kwargs):
def build_source_pattern(*patterns, prefix='', suffix=''):
"""Helper pattern to build source pattern."""
prefix_format = kwargs.get('prefix') or ''
suffix_format = kwargs.get('suffix') or ''
string_format = prefix_format + '({0})' + suffix_format
return [string_format.format(pattern) for pattern in patterns]
return [prefix + f'({pattern})' + suffix for pattern in patterns]
def demote_other(match, other): # pylint: disable=unused-argument
"""Default conflict solver with 'other' property."""
return other if other.name == 'other' or other.name == 'release_group' else '__default__'
return other if other.name in ['other', 'release_group'] else '__default__'
rebulk.regex(*build_source_pattern('VHS', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('VHS', suffix=optional(rip_suffix)),
value={'source': 'VHS', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('CAM', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('CAM', suffix=optional(rip_suffix)),
value={'source': 'Camera', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('HD-?CAM', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('HD-?CAM', suffix=optional(rip_suffix)),
value={'source': 'HD Camera', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=rip_optional_suffix),
value={'source': 'Telesync', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=rip_optional_suffix),
# For TS, we remove 'streaming_service.suffix' tag to avoid "Shots" being guessed as Showtime and TS.
rebulk.regex(*build_source_pattern('TELESYNC', 'TS', suffix=optional(rip_suffix)),
value={'source': 'Telesync', 'other': 'Rip'}, tags=['video-codec-prefix'], overrides=["tags"])
rebulk.regex(*build_source_pattern('HD-?TELESYNC', 'HD-?TS', suffix=optional(rip_suffix)),
value={'source': 'HD Telesync', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('WORKPRINT', 'WP'), value='Workprint')
rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('TELECINE', 'TC', suffix=optional(rip_suffix)),
value={'source': 'Telecine', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('HD-?TELECINE', 'HD-?TC', suffix=optional(rip_suffix)),
value={'source': 'HD Telecine', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('PPV', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('PPV', suffix=optional(rip_suffix)),
value={'source': 'Pay-per-view', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('SD-?TV', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('SD-?TV', suffix=optional(rip_suffix)),
value={'source': 'TV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV', suffix=rip_suffix), # TV is too common to allow matching
value={'source': 'TV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV', 'SD-?TV', prefix=rip_prefix),
value={'source': 'TV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV-?(?=Dub)'), value='TV')
rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('DVB', 'PD-?TV', suffix=optional(rip_suffix)),
value={'source': 'Digital TV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('DVD', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('DVD', suffix=optional(rip_suffix)),
value={'source': 'DVD', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('DM', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('DM', suffix=optional(rip_suffix)),
value={'source': 'Digital Master', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('VIDEO-?TS', 'DVD-?R(?:$|(?!E))', # 'DVD-?R(?:$|^E)' => DVD-Real ...
'DVD-?9', 'DVD-?5'), value='DVD')
rebulk.regex(*build_source_pattern('HD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
rebulk.regex(*build_source_pattern('HD-?TV', suffix=optional(rip_suffix)), conflict_solver=demote_other,
value={'source': 'HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV-?HD', suffix=rip_suffix), conflict_solver=demote_other,
value={'source': 'HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('TV', suffix='-?(?P<other>Rip-?HD)'), conflict_solver=demote_other,
value={'source': 'HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('VOD', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('VOD', suffix=optional(rip_suffix)),
value={'source': 'Video on Demand', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('WEB', 'WEB-?DL', suffix=rip_suffix),
value={'source': 'Web', 'other': 'Rip'})
# WEBCap is a synonym to WEBRip, mostly used by non english
rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('WEB-?(?P<another>Cap)', suffix=optional(rip_suffix)),
value={'source': 'Web', 'other': 'Rip', 'another': 'Rip'})
rebulk.regex(*build_source_pattern('WEB-?DL', 'WEB-?U?HD', 'DL-?WEB', 'DL(?=-?Mux)'),
value={'source': 'Web'})
rebulk.regex('(WEB)', value='Web', tags='weak.source')
rebulk.regex(*build_source_pattern('HD-?DVD', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('HD-?DVD', suffix=optional(rip_suffix)),
value={'source': 'HD-DVD', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('Blu-?ray', 'BD', 'BD[59]', 'BD25', 'BD50', suffix=optional(rip_suffix)),
value={'source': 'Blu-ray', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('(?P<another>BR)-?(?=Scr(?:eener)?)', '(?P<another>BR)-?(?=Mux)'), # BRRip
value={'source': 'Blu-ray', 'another': 'Reencoded'})
@ -112,12 +107,12 @@ def source(config): # pylint:disable=unused-argument
rebulk.regex(*build_source_pattern('Ultra-?Blu-?ray', 'Blu-?ray-?Ultra'), value='Ultra HD Blu-ray')
rebulk.regex(*build_source_pattern('AHDTV'), value='Analog HDTV')
rebulk.regex(*build_source_pattern('UHD-?TV', suffix=rip_optional_suffix), conflict_solver=demote_other,
rebulk.regex(*build_source_pattern('UHD-?TV', suffix=optional(rip_suffix)), conflict_solver=demote_other,
value={'source': 'Ultra HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('UHD', suffix=rip_suffix), conflict_solver=demote_other,
value={'source': 'Ultra HDTV', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=rip_optional_suffix),
rebulk.regex(*build_source_pattern('DSR', 'DTH', suffix=optional(rip_suffix)),
value={'source': 'Satellite', 'other': 'Rip'})
rebulk.regex(*build_source_pattern('DSR?', 'SAT', suffix=rip_suffix),
value={'source': 'Satellite', 'other': 'Rip'})
@ -145,11 +140,11 @@ class UltraHdBlurayRule(Rule):
def validate_range(cls, matches, start, end):
"""Validate no holes or invalid matches exist in the specified range."""
return (
not matches.holes(start, end, predicate=lambda m: m.value.strip(seps)) and
not matches.range(start, end, predicate=(
lambda m: not m.private and (
m.name not in ('screen_size', 'color_depth') and (
m.name != 'other' or 'uhdbluray-neighbor' not in m.tags))))
not matches.holes(start, end, predicate=lambda m: m.value.strip(seps)) and
not matches.range(start, end, predicate=(
lambda m: not m.private and (
m.name not in ('screen_size', 'color_depth') and (
m.name != 'other' or 'uhdbluray-neighbor' not in m.tags))))
)
def when(self, matches, context):

View File

@ -9,6 +9,7 @@ from rebulk import Rebulk
from rebulk.rules import Rule, RemoveMatch
from ..common.pattern import is_disabled
from ...config import load_config_patterns
from ...rules.common import seps, dash
from ...rules.common.validators import seps_before, seps_after
@ -25,22 +26,7 @@ def streaming_service(config): # pylint: disable=too-many-statements,unused-arg
rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash])
rebulk.defaults(name='streaming_service', tags=['source-prefix'])
regex_prefix = 're:'
for value, items in config.items():
patterns = items if isinstance(items, list) else [items]
for pattern in patterns:
if isinstance(pattern, dict):
pattern_value = pattern.pop('pattern')
kwargs = pattern
pattern = pattern_value
else:
kwargs = {}
regex = kwargs.pop('regex', False)
if regex or pattern.startswith(regex_prefix):
rebulk.regex(pattern[len(regex_prefix):], value=value, **kwargs)
else:
rebulk.string(pattern, value=value, **kwargs)
load_config_patterns(rebulk, config)
rebulk.rules(ValidateStreamingService)

View File

@ -205,7 +205,7 @@ class TitleBaseRule(Rule):
for ignored_match in ignored_matches:
if ignored_match not in to_keep:
starting = matches.chain_after(hole.start, seps,
predicate=lambda m: m == ignored_match)
predicate=lambda m, im=ignored_match: m == im)
if starting:
should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, True)
if should_keep:

View File

@ -3,7 +3,11 @@
"""
Website property.
"""
from pkg_resources import resource_stream # @UnresolvedImport
try:
from importlib.resources import files # @UnresolvedImport
except ImportError:
from importlib_resources import files # @UnresolvedImport
from rebulk.remodule import re
from rebulk import Rebulk, Rule, RemoveMatch
@ -27,11 +31,12 @@ def website(config):
rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
rebulk.defaults(name="website")
with resource_stream('guessit', 'data/tlds-alpha-by-domain.txt') as tld_file:
with files('guessit.data') as data_files:
tld_file = data_files.joinpath('tlds-alpha-by-domain.txt').read_text(encoding='utf-8')
tlds = [
tld.strip().decode('utf-8')
for tld in tld_file.readlines()
if b'--' not in tld
tld.strip()
for tld in tld_file.split('\n')
if '--' not in tld
][1:] # All registered domain extension
safe_tlds = config['safe_tlds'] # For sure a website extension
@ -40,15 +45,15 @@ def website(config):
website_prefixes = config['prefixes']
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
r'\.)+(?:[a-z-0-9-]+\.)+(?:'+build_or_pattern(tlds) +
r'))(?:[^a-z0-9]|$)',
children=True)
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) +
r'\.)*[a-z0-9-]+\.(?:'+build_or_pattern(safe_tlds) +
r'))(?:[^a-z0-9]|$)',
safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True)
rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) +
r'\.)*[a-z0-9-]+\.(?:'+build_or_pattern(safe_prefix) +
r'\.)+(?:'+build_or_pattern(tlds) +
r'))(?:[^a-z0-9]|$)',
safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True)

View File

@ -4690,4 +4690,69 @@
release_group: NOGPR
container: mp4
mimetype: video/mp4
type: episode
type: episode
? "Seitokai Yakuindomo - 14 OAD [BDRip 1920x1080 x264 FLAC].mkv"
: title: Seitokai Yakuindomo
episode: 14
source: Blu-ray
other: [Original Animation DVD, Rip]
screen_size: 1080p
aspect_ratio: 1.778
video_codec: H.264
audio_codec: FLAC
container: mkv
mimetype: video/x-matroska
type: episode
? "[EveTaku] Kyouso Giga ONA v2 [540p][128BAC43].mkv"
: release_group: EveTaku
title: Kyouso Giga
other: Original Net Animation
version: 2
screen_size: 540p
crc32: 128BAC43
container: mkv
mimetype: video/x-matroska
type: episode
? '[Erai-raws] Fumetsu no Anata e - 03 [720p][Multiple Subtitle].mkv'
: release_group: Erai-raws
title: Fumetsu no Anata e
episode: 3
screen_size: 720p
subtitle_language: mul
container: mkv
mimetype: video/x-matroska
type: episode
? Mom.S06E08.Jell-O.Shots.and.the.Truth.About.Santa.1080p.AMZN.WEB-DL.DDP5.1.H.264-NTb.mkv
: title: Mom
season: 6
episode: 8
episode_title: Jell-O Shots and the Truth About Santa
screen_size: 1080p
streaming_service: Amazon Prime
source: Web
audio_codec: Dolby Digital Plus
audio_channels: '5.1'
video_codec: H.264
release_group: NTb
container: mkv
mimetype: video/x-matroska
type: episode
? Archer.2009.S12E05.Shots.720p.HULU.WEB-DL.DDP5.1.H.264-NOGRP
: title: Archer
year: 2009
season: 12
episode: 5
episode_title: Shots
screen_size: 720p
streaming_service: Hulu
source: Web
audio_codec: Dolby Digital Plus
audio_channels: '5.1'
video_codec: H.264
release_group: NOGRP
type: episode

View File

@ -50,6 +50,16 @@
video_codec: Xvid
release_group: PUKKA
? Enter.the.Void.2009.2in1.1080p.BluRay.DD5.1.x264-EbP.mkv
: title: Enter the Void
year: 2009
other: 2in1
screen_size: 1080p
source: Blu-ray
audio_codec: Dolby Digital
video_codec: H.264
release_group: EbP
? "[XCT].Le.Prestige.(The.Prestige).DVDRip.[x264.HP.He-Aac.{Fr-Eng}.St{Fr-Eng}.Chaps].mkv"
: title: Le Prestige
source: DVD
@ -79,6 +89,11 @@
year: 1985
cd: 2
? Movies/Picnic.at.Hanging.Rock.1975.Criterion.Collection.1080p.BluRay.x264.DTS-WiKi
: title: Picnic at Hanging Rock
edition: Criterion
year: 1975
? Movies/Persepolis (2007)/[XCT] Persepolis [H264+Aac-128(Fr-Eng)+ST(Fr-Eng)+Ind].mkv
: title: Persepolis
year: 2007
@ -1045,6 +1060,36 @@
release_group: Seven
type: movie
? Maze.Runner.The.Scorch.Trials.OM.2015.WEB-DLRip.by.Seven
: title: Maze Runner The Scorch Trials
other: [Open Matte, Rip]
year: 2015
source: Web
release_group: Seven
type: movie
? Foo Bar 2015 Open Matte 1080p WEB-DL DD+5.1 H.264
: title: Foo Bar
year: 2015
other: Open Matte
screen_size: 1080p
source: Web
audio_codec: Dolby Digital Plus
audio_channels: '5.1'
video_codec: H.264
type: movie
? foo.bar.2015.open.matte.1080p.web-dl.dd+5.1.h.264
: title: foo bar
year: 2015
other: Open Matte
screen_size: 1080p
source: Web
audio_codec: Dolby Digital Plus
audio_channels: '5.1'
video_codec: H.264
type: movie
? Kampen Om Tungtvannet aka The Heavy Water War COMPLETE 720p x265 HEVC-Lund
: title: Kampen Om Tungtvannet aka The Heavy Water War
other: Complete
@ -1314,6 +1359,22 @@
release_group: ETRG
type: movie
? Heathers.1988.1080p.BluRay.ARROW.4K.RESTORED.Plus.Comm.DTS.x264-MaG
: title: Heathers
edition: Restored
year: 1988
screen_size: 1080p
release_group: MaG
type: movie
? The.Woman.2011.1080p.BluRay.4K.REMASTERED.Remux.AVC.DTS-HD.MA.5.1-PTP.mkv
: title: The Woman
edition: Remastered
year: 2011
screen_size: 1080p
release_group: PTP
type: movie
? Delibal 2015 720p Upscale DVDRip x264 DD5.1 AC3
: title: Delibal
year: 2015
@ -1785,3 +1846,16 @@
release_group: EVO
container: mkv
type: movie
? Kes.1969.1080p.BluRay.FLAC1.0.x264-DON.mkv
: title: Kes
year: 1969
screen_size: 1080p
source: Blu-ray
audio_codec: FLAC
audio_channels: '1.0'
video_codec: H.264
release_group: DON
container: mkv
mimetype: video/x-matroska
type: movie

View File

@ -88,6 +88,7 @@
? +stereo
: audio_channels: '2.0'
? +1.0
? +1ch
? +mono
: audio_channels: '1.0'

View File

@ -15,9 +15,9 @@
: edition: Special
? Criterion Edition
? Criterion Collection
? Edition Criterion
? CC
? -Criterion
: edition: Criterion
? Deluxe

View File

@ -1,169 +1,176 @@
# Multiple input strings having same expected results can be chained.
# Use - marker to check inputs that should not match results.
+DVDSCR:
+DVDScreener:
+DVD-SCR:
+DVD Screener:
+DVD AnythingElse Screener:
-DVD AnythingElse SCR:
other: Screener
? +DVDSCR
? +DVDScreener
? +DVD-SCR
? +DVD Screener
? +DVD AnythingElse Screener
? -DVD AnythingElse SCR
: other: Screener
+AudioFix:
+AudioFixed:
+Audio Fix:
+Audio Fixed:
other: Audio Fixed
? +AudioFix
? +AudioFixed
? +Audio Fix
? +Audio Fixed
: other: Audio Fixed
+SyncFix:
+SyncFixed:
+Sync Fix:
+Sync Fixed:
other: Sync Fixed
? +SyncFix
? +SyncFixed
? +Sync Fix
? +Sync Fixed
: other: Sync Fixed
+DualAudio:
+Dual Audio:
other: Dual Audio
? +DualAudio
? +Dual Audio
: other: Dual Audio
+ws:
+WideScreen:
+Wide Screen:
other: Widescreen
? +ws
? +WideScreen
? +Wide Screen
: other: Widescreen
# Fix must be surround by others properties to be matched.
DVD.fix.XViD:
-DVD.Fix:
-Fix.XViD:
other: Fix
? DVD.fix.XViD
? -DVD.Fix
? -Fix.XViD
: other: Fix
-proper_count: 1
-DVD.BlablaBla.Fix.Blablabla.XVID:
-DVD.BlablaBla.Fix.XVID:
-DVD.Fix.Blablabla.XVID:
other: Fix
? -DVD.BlablaBla.Fix.Blablabla.XVID
? -DVD.BlablaBla.Fix.XVID
? -DVD.Fix.Blablabla.XVID
: other: Fix
-proper_count: 1
DVD.Real.PROPER.REPACK:
other: Proper
? DVD.Real.PROPER.REPACK
: other: Proper
proper_count: 3
Proper.720p:
+Repack:
+Rerip:
other: Proper
? Proper.720p
? +Repack
? +Rerip
: other: Proper
proper_count: 1
XViD.Fansub:
other: Fan Subtitled
? XViD.Fansub
: other: Fan Subtitled
XViD.Fastsub:
other: Fast Subtitled
? XViD.Fastsub
: other: Fast Subtitled
+Season Complete:
-Complete:
other: Complete
? +Season Complete
? -Complete
: other: Complete
R5:
other: Region 5
? R5
: other: Region 5
RC:
other: Region C
? RC
: other: Region C
PreAir:
Pre Air:
other: Preair
? PreAir
? Pre Air
: other: Preair
Screener:
other: Screener
? Screener
: other: Screener
Remux:
other: Remux
? Remux
: other: Remux
Hybrid:
other: Hybrid
? Hybrid
: other: Hybrid
3D.2019:
other: 3D
? 3D.2019
: other: 3D
HD:
other: HD
? HD
: other: HD
FHD:
FullHD:
Full HD:
other: Full HD
? FHD
? FullHD
? Full HD
: other: Full HD
UHD:
Ultra:
UltraHD:
Ultra HD:
other: Ultra HD
? UHD
? Ultra
? UltraHD
? Ultra HD
: other: Ultra HD
? mHD # ??
HDLight:
other: Micro HD
? HDLight
: other: Micro HD
HQ:
other: High Quality
? HQ
: other: High Quality
hr:
other: High Resolution
? hr
: other: High Resolution
PAL:
other: PAL
? PAL
: other: PAL
SECAM:
other: SECAM
? SECAM
: other: SECAM
NTSC:
other: NTSC
? NTSC
: other: NTSC
LDTV:
other: Low Definition
? LDTV
: other: Low Definition
LD:
other: Line Dubbed
? LD
: other: Line Dubbed
MD:
other: Mic Dubbed
? MD
: other: Mic Dubbed
-The complete movie:
other: Complete
? -The complete movie
: other: Complete
+The complete movie:
title: The complete movie
? +The complete movie
: title: The complete movie
+AC3-HQ:
audio_profile: High Quality
? +AC3-HQ
: audio_profile: High Quality
Other-HQ:
other: High Quality
? Other-HQ
: other: High Quality
reenc:
re-enc:
re-encoded:
reencoded:
other: Reencoded
? reenc
? re-enc
? re-encoded
? reencoded
: other: Reencoded
CONVERT XViD:
other: Converted
? CONVERT XViD
: other: Converted
+HDRIP: # it's a Rip from non specified HD source
other: [HD, Rip]
? +HDRIP # it's a Rip from non specified HD source
: other: [HD, Rip]
SDR:
other: Standard Dynamic Range
? SDR
: other: Standard Dynamic Range
HDR:
HDR10:
-HDR100:
other: HDR10
? HDR
? HDR10
? -HDR100
: other: HDR10
BT2020:
BT.2020:
-BT.20200:
-BT.2021:
other: BT.2020
? BT2020
? BT.2020
? -BT.20200
? -BT.2021
: other: BT.2020
Upscaled:
Upscale:
other: Upscaled
? Upscaled
? Upscale
: other: Upscaled
? REPACK5
? ReRip5
: other: Proper
proper_count: 5

View File

@ -81,6 +81,15 @@
? +852x480i
: screen_size: 480i
? +540p
? +540px
? -540i
? -540
: screen_size: 540p
? +540i
: screen_size: 540i
? +576p
? +576px
? -576i

View File

@ -21,3 +21,6 @@
? Dark.Net.S01E06.720p.HDTV.x264-BATV
Dark.Net.2015.720p.HDTV.x264-BATV
: title: Dark Net
? www.4MovieRulz.be - Ginny Weds Sunny (2020) 1080p Hindi Proper HDRip x264 DD5.1 - 2.4GB ESub.mkv
: website: www.4MovieRulz.be

File diff suppressed because it is too large Load Diff

View File

@ -6,8 +6,10 @@ import os
from pathlib import Path
import pytest
from pytest_mock import MockerFixture
from ..api import guessit, properties, suggested_expected, GuessitException
from .. import api
from ..api import guessit, properties, suggested_expected, GuessitException, default_api
__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
@ -62,7 +64,53 @@ def test_exception():
def test_suggested_expected():
with open(os.path.join(__location__, 'suggested.json'), 'r') as f:
with open(os.path.join(__location__, 'suggested.json'), 'r', encoding='utf-8') as f:
content = json.load(f)
actual = suggested_expected(content['titles'])
assert actual == content['suggested']
def test_should_rebuild_rebulk_on_advanced_config_change(mocker: MockerFixture):
api.reset()
rebulk_builder_spy = mocker.spy(api, 'rebulk_builder')
string = "some.movie.trfr.mkv"
result1 = default_api.guessit(string)
assert result1.get('title') == 'some movie trfr'
assert 'subtitle_language' not in result1
rebulk_builder_spy.assert_called_once_with(mocker.ANY)
rebulk_builder_spy.reset_mock()
result2 = default_api.guessit(string, {'advanced_config': {'language': {'subtitle_prefixes': ['tr']}}})
assert result2.get('title') == 'some movie'
assert str(result2.get('subtitle_language')) == 'fr'
rebulk_builder_spy.assert_called_once_with(mocker.ANY)
rebulk_builder_spy.reset_mock()
def test_should_not_rebuild_rebulk_on_same_advanced_config(mocker: MockerFixture):
api.reset()
rebulk_builder_spy = mocker.spy(api, 'rebulk_builder')
string = "some.movie.subfr.mkv"
result1 = default_api.guessit(string)
assert result1.get('title') == 'some movie'
assert str(result1.get('subtitle_language')) == 'fr'
rebulk_builder_spy.assert_called_once_with(mocker.ANY)
rebulk_builder_spy.reset_mock()
result2 = default_api.guessit(string)
assert result2.get('title') == 'some movie'
assert str(result2.get('subtitle_language')) == 'fr'
assert rebulk_builder_spy.call_count == 0
rebulk_builder_spy.reset_mock()

View File

@ -16,7 +16,7 @@ __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file
# Prevent output from spamming the console
@pytest.fixture(scope="function", autouse=True)
def no_stdout(monkeypatch):
with open(os.devnull, "w") as f:
with open(os.devnull, "w") as f: # pylint:disable=unspecified-encoding
monkeypatch.setattr(sys, "stdout", f)
yield

View File

@ -3,8 +3,6 @@
# pylint: disable=no-self-use, pointless-statement, missing-docstring, invalid-name
import logging
import os
# io.open supports encoding= in python 2.7
from io import open # pylint: disable=redefined-builtin
import babelfish
import yaml # pylint:disable=wrong-import-order
@ -52,16 +50,16 @@ class EntryResult(object):
if self.ok:
return self.string + ': OK!'
if self.warning:
return '%s%s: WARNING! (valid=%i, extra=%s)' % ('-' if self.negates else '', self.string, len(self.valid),
self.extra)
return f'{"-" if self.negates else ""}{self.string}: ' \
f'WARNING! (valid={len(self.valid)}, extra={self.extra})'
if self.error:
return '%s%s: ERROR! (valid=%i, extra=%s, missing=%s, different=%s, others=%s)' % \
('-' if self.negates else '', self.string, len(self.valid), self.extra, self.missing,
self.different, self.others)
return f'{"-" if self.negates else ""}{self.string}: ' \
f'ERROR! (valid={len(self.valid)}, extra={self.extra}, ' \
f'missing={self.missing}, different={self.different}, others={self.others})'
return '%s%s: UNKOWN! (valid=%i, extra=%s, missing=%s, different=%s, others=%s)' % \
('-' if self.negates else '', self.string, len(self.valid), self.extra, self.missing, self.different,
self.others)
return f'{"-" if self.negates else ""}{self.string}: ' \
f'UNKOWN! (valid={len(self.valid)}, extra={self.extra}, ' \
f'missing={self.missing}, different={self.different}, others={self.others})'
@property
def details(self):

View File

@ -5,8 +5,8 @@ Options
"""
from collections import OrderedDict
import babelfish
import babelfish
import yaml # pylint:disable=wrong-import-order
from .rules.common.quantity import BitRate, FrameRate, Size
@ -35,7 +35,7 @@ class OrderedDictYAMLLoader(yaml.SafeLoader):
self.flatten_mapping(node)
else: # pragma: no cover
raise yaml.constructor.ConstructorError(None, None,
'expected a mapping node, but found %s' % node.id, node.start_mark)
f'expected a mapping node, but found {node.id}', node.start_mark)
mapping = OrderedDict()
for key_node, value_node in node.value:
@ -44,8 +44,8 @@ class OrderedDictYAMLLoader(yaml.SafeLoader):
hash(key)
except TypeError as exc: # pragma: no cover
raise yaml.constructor.ConstructorError('while constructing a mapping',
node.start_mark, 'found unacceptable key (%s)'
% exc, key_node.start_mark)
node.start_mark, f'found unacceptable key ({exc})'
, key_node.start_mark)
value = self.construct_object(value_node, deep=deep)
mapping[key] = value
return mapping

View File

@ -0,0 +1,36 @@
"""Read resources contained within a package."""
from ._common import (
as_file,
files,
Package,
)
from ._legacy import (
contents,
open_binary,
read_binary,
open_text,
read_text,
is_resource,
path,
Resource,
)
from importlib_resources.abc import ResourceReader
__all__ = [
'Package',
'Resource',
'ResourceReader',
'as_file',
'contents',
'files',
'is_resource',
'open_binary',
'open_text',
'path',
'read_binary',
'read_text',
]

View File

@ -0,0 +1,170 @@
from contextlib import suppress
from io import TextIOWrapper
from . import abc
class SpecLoaderAdapter:
"""
Adapt a package spec to adapt the underlying loader.
"""
def __init__(self, spec, adapter=lambda spec: spec.loader):
self.spec = spec
self.loader = adapter(spec)
def __getattr__(self, name):
return getattr(self.spec, name)
class TraversableResourcesLoader:
"""
Adapt a loader to provide TraversableResources.
"""
def __init__(self, spec):
self.spec = spec
def get_resource_reader(self, name):
return CompatibilityFiles(self.spec)._native()
def _io_wrapper(file, mode='r', *args, **kwargs):
if mode == 'r':
return TextIOWrapper(file, *args, **kwargs)
elif mode == 'rb':
return file
raise ValueError(
"Invalid mode value '{}', only 'r' and 'rb' are supported".format(mode)
)
class CompatibilityFiles:
"""
Adapter for an existing or non-existent resource reader
to provide a compatibility .files().
"""
class SpecPath(abc.Traversable):
"""
Path tied to a module spec.
Can be read and exposes the resource reader children.
"""
def __init__(self, spec, reader):
self._spec = spec
self._reader = reader
def iterdir(self):
if not self._reader:
return iter(())
return iter(
CompatibilityFiles.ChildPath(self._reader, path)
for path in self._reader.contents()
)
def is_file(self):
return False
is_dir = is_file
def joinpath(self, other):
if not self._reader:
return CompatibilityFiles.OrphanPath(other)
return CompatibilityFiles.ChildPath(self._reader, other)
@property
def name(self):
return self._spec.name
def open(self, mode='r', *args, **kwargs):
return _io_wrapper(self._reader.open_resource(None), mode, *args, **kwargs)
class ChildPath(abc.Traversable):
"""
Path tied to a resource reader child.
Can be read but doesn't expose any meaningful children.
"""
def __init__(self, reader, name):
self._reader = reader
self._name = name
def iterdir(self):
return iter(())
def is_file(self):
return self._reader.is_resource(self.name)
def is_dir(self):
return not self.is_file()
def joinpath(self, other):
return CompatibilityFiles.OrphanPath(self.name, other)
@property
def name(self):
return self._name
def open(self, mode='r', *args, **kwargs):
return _io_wrapper(
self._reader.open_resource(self.name), mode, *args, **kwargs
)
class OrphanPath(abc.Traversable):
"""
Orphan path, not tied to a module spec or resource reader.
Can't be read and doesn't expose any meaningful children.
"""
def __init__(self, *path_parts):
if len(path_parts) < 1:
raise ValueError('Need at least one path part to construct a path')
self._path = path_parts
def iterdir(self):
return iter(())
def is_file(self):
return False
is_dir = is_file
def joinpath(self, other):
return CompatibilityFiles.OrphanPath(*self._path, other)
@property
def name(self):
return self._path[-1]
def open(self, mode='r', *args, **kwargs):
raise FileNotFoundError("Can't open orphan path")
def __init__(self, spec):
self.spec = spec
@property
def _reader(self):
with suppress(AttributeError):
return self.spec.loader.get_resource_reader(self.spec.name)
def _native(self):
"""
Return the native reader if it supports files().
"""
reader = self._reader
return reader if hasattr(reader, 'files') else self
def __getattr__(self, attr):
return getattr(self._reader, attr)
def files(self):
return CompatibilityFiles.SpecPath(self.spec, self._reader)
def wrap_spec(package):
"""
Construct a package spec with traversable compatibility
on the spec/loader/reader.
"""
return SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader)

View File

@ -0,0 +1,104 @@
import os
import pathlib
import tempfile
import functools
import contextlib
import types
import importlib
from typing import Union, Optional
from .abc import ResourceReader, Traversable
from ._compat import wrap_spec
Package = Union[types.ModuleType, str]
def files(package):
# type: (Package) -> Traversable
"""
Get a Traversable resource from a package
"""
return from_package(get_package(package))
def get_resource_reader(package):
# type: (types.ModuleType) -> Optional[ResourceReader]
"""
Return the package's loader if it's a ResourceReader.
"""
# We can't use
# a issubclass() check here because apparently abc.'s __subclasscheck__()
# hook wants to create a weak reference to the object, but
# zipimport.zipimporter does not support weak references, resulting in a
# TypeError. That seems terrible.
spec = package.__spec__
reader = getattr(spec.loader, 'get_resource_reader', None) # type: ignore
if reader is None:
return None
return reader(spec.name) # type: ignore
def resolve(cand):
# type: (Package) -> types.ModuleType
return cand if isinstance(cand, types.ModuleType) else importlib.import_module(cand)
def get_package(package):
# type: (Package) -> types.ModuleType
"""Take a package name or module object and return the module.
Raise an exception if the resolved module is not a package.
"""
resolved = resolve(package)
if wrap_spec(resolved).submodule_search_locations is None:
raise TypeError(f'{package!r} is not a package')
return resolved
def from_package(package):
"""
Return a Traversable object for the given package.
"""
spec = wrap_spec(package)
reader = spec.loader.get_resource_reader(spec.name)
return reader.files()
@contextlib.contextmanager
def _tempfile(reader, suffix=''):
# Not using tempfile.NamedTemporaryFile as it leads to deeper 'try'
# blocks due to the need to close the temporary file to work on Windows
# properly.
fd, raw_path = tempfile.mkstemp(suffix=suffix)
try:
try:
os.write(fd, reader())
finally:
os.close(fd)
del reader
yield pathlib.Path(raw_path)
finally:
try:
os.remove(raw_path)
except FileNotFoundError:
pass
@functools.singledispatch
def as_file(path):
"""
Given a Traversable object, return that object as a
path on the local file system in a context manager.
"""
return _tempfile(path.read_bytes, suffix=path.name)
@as_file.register(pathlib.Path)
@contextlib.contextmanager
def _(path):
"""
Degenerate behavior for pathlib.Path objects.
"""
yield path

View File

@ -0,0 +1,98 @@
# flake8: noqa
import abc
import sys
import pathlib
from contextlib import suppress
if sys.version_info >= (3, 10):
from zipfile import Path as ZipPath # type: ignore
else:
from zipp import Path as ZipPath # type: ignore
try:
from typing import runtime_checkable # type: ignore
except ImportError:
def runtime_checkable(cls): # type: ignore
return cls
try:
from typing import Protocol # type: ignore
except ImportError:
Protocol = abc.ABC # type: ignore
class TraversableResourcesLoader:
"""
Adapt loaders to provide TraversableResources and other
compatibility.
Used primarily for Python 3.9 and earlier where the native
loaders do not yet implement TraversableResources.
"""
def __init__(self, spec):
self.spec = spec
@property
def path(self):
return self.spec.origin
def get_resource_reader(self, name):
from . import readers, _adapters
def _zip_reader(spec):
with suppress(AttributeError):
return readers.ZipReader(spec.loader, spec.name)
def _namespace_reader(spec):
with suppress(AttributeError, ValueError):
return readers.NamespaceReader(spec.submodule_search_locations)
def _available_reader(spec):
with suppress(AttributeError):
return spec.loader.get_resource_reader(spec.name)
def _native_reader(spec):
reader = _available_reader(spec)
return reader if hasattr(reader, 'files') else None
def _file_reader(spec):
try:
path = pathlib.Path(self.path)
except TypeError:
return None
if path.exists():
return readers.FileReader(self)
return (
# native reader if it supplies 'files'
_native_reader(self.spec)
or
# local ZipReader if a zip module
_zip_reader(self.spec)
or
# local NamespaceReader if a namespace module
_namespace_reader(self.spec)
or
# local FileReader
_file_reader(self.spec)
# fallback - adapt the spec ResourceReader to TraversableReader
or _adapters.CompatibilityFiles(self.spec)
)
def wrap_spec(package):
"""
Construct a package spec with traversable compatibility
on the spec/loader/reader.
Supersedes _adapters.wrap_spec to use TraversableResourcesLoader
from above for older Python compatibility (<3.10).
"""
from . import _adapters
return _adapters.SpecLoaderAdapter(package.__spec__, TraversableResourcesLoader)

View File

@ -0,0 +1,35 @@
from itertools import filterfalse
from typing import (
Callable,
Iterable,
Iterator,
Optional,
Set,
TypeVar,
Union,
)
# Type and type variable definitions
_T = TypeVar('_T')
_U = TypeVar('_U')
def unique_everseen(
iterable: Iterable[_T], key: Optional[Callable[[_T], _U]] = None
) -> Iterator[_T]:
"List unique elements, preserving order. Remember all elements ever seen."
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
# unique_everseen('ABBCcAD', str.lower) --> A B C D
seen: Set[Union[_T, _U]] = set()
seen_add = seen.add
if key is None:
for element in filterfalse(seen.__contains__, iterable):
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element

View File

@ -0,0 +1,121 @@
import functools
import os
import pathlib
import types
import warnings
from typing import Union, Iterable, ContextManager, BinaryIO, TextIO, Any
from . import _common
Package = Union[types.ModuleType, str]
Resource = str
def deprecated(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
warnings.warn(
f"{func.__name__} is deprecated. Use files() instead. "
"Refer to https://importlib-resources.readthedocs.io"
"/en/latest/using.html#migrating-from-legacy for migration advice.",
DeprecationWarning,
stacklevel=2,
)
return func(*args, **kwargs)
return wrapper
def normalize_path(path):
# type: (Any) -> str
"""Normalize a path by ensuring it is a string.
If the resulting string contains path separators, an exception is raised.
"""
str_path = str(path)
parent, file_name = os.path.split(str_path)
if parent:
raise ValueError(f'{path!r} must be only a file name')
return file_name
@deprecated
def open_binary(package: Package, resource: Resource) -> BinaryIO:
"""Return a file-like object opened for binary reading of the resource."""
return (_common.files(package) / normalize_path(resource)).open('rb')
@deprecated
def read_binary(package: Package, resource: Resource) -> bytes:
"""Return the binary contents of the resource."""
return (_common.files(package) / normalize_path(resource)).read_bytes()
@deprecated
def open_text(
package: Package,
resource: Resource,
encoding: str = 'utf-8',
errors: str = 'strict',
) -> TextIO:
"""Return a file-like object opened for text reading of the resource."""
return (_common.files(package) / normalize_path(resource)).open(
'r', encoding=encoding, errors=errors
)
@deprecated
def read_text(
package: Package,
resource: Resource,
encoding: str = 'utf-8',
errors: str = 'strict',
) -> str:
"""Return the decoded string of the resource.
The decoding-related arguments have the same semantics as those of
bytes.decode().
"""
with open_text(package, resource, encoding, errors) as fp:
return fp.read()
@deprecated
def contents(package: Package) -> Iterable[str]:
"""Return an iterable of entries in `package`.
Note that not all entries are resources. Specifically, directories are
not considered resources. Use `is_resource()` on each entry returned here
to check if it is a resource or not.
"""
return [path.name for path in _common.files(package).iterdir()]
@deprecated
def is_resource(package: Package, name: str) -> bool:
"""True if `name` is a resource inside `package`.
Directories are *not* resources.
"""
resource = normalize_path(name)
return any(
traversable.name == resource and traversable.is_file()
for traversable in _common.files(package).iterdir()
)
@deprecated
def path(
package: Package,
resource: Resource,
) -> ContextManager[pathlib.Path]:
"""A context manager providing a file path object to the resource.
If the resource does not already exist on its own on the file system,
a temporary file will be created. If the file was created, the file
will be deleted upon exiting the context manager (no exception is
raised if the file was deleted prior to the context manager
exiting).
"""
return _common.as_file(_common.files(package) / normalize_path(resource))

View File

@ -0,0 +1,137 @@
import abc
from typing import BinaryIO, Iterable, Text
from ._compat import runtime_checkable, Protocol
class ResourceReader(metaclass=abc.ABCMeta):
"""Abstract base class for loaders to provide resource reading support."""
@abc.abstractmethod
def open_resource(self, resource: Text) -> BinaryIO:
"""Return an opened, file-like object for binary reading.
The 'resource' argument is expected to represent only a file name.
If the resource cannot be found, FileNotFoundError is raised.
"""
# This deliberately raises FileNotFoundError instead of
# NotImplementedError so that if this method is accidentally called,
# it'll still do the right thing.
raise FileNotFoundError
@abc.abstractmethod
def resource_path(self, resource: Text) -> Text:
"""Return the file system path to the specified resource.
The 'resource' argument is expected to represent only a file name.
If the resource does not exist on the file system, raise
FileNotFoundError.
"""
# This deliberately raises FileNotFoundError instead of
# NotImplementedError so that if this method is accidentally called,
# it'll still do the right thing.
raise FileNotFoundError
@abc.abstractmethod
def is_resource(self, path: Text) -> bool:
"""Return True if the named 'path' is a resource.
Files are resources, directories are not.
"""
raise FileNotFoundError
@abc.abstractmethod
def contents(self) -> Iterable[str]:
"""Return an iterable of entries in `package`."""
raise FileNotFoundError
@runtime_checkable
class Traversable(Protocol):
"""
An object with a subset of pathlib.Path methods suitable for
traversing directories and opening files.
"""
@abc.abstractmethod
def iterdir(self):
"""
Yield Traversable objects in self
"""
def read_bytes(self):
"""
Read contents of self as bytes
"""
with self.open('rb') as strm:
return strm.read()
def read_text(self, encoding=None):
"""
Read contents of self as text
"""
with self.open(encoding=encoding) as strm:
return strm.read()
@abc.abstractmethod
def is_dir(self) -> bool:
"""
Return True if self is a directory
"""
@abc.abstractmethod
def is_file(self) -> bool:
"""
Return True if self is a file
"""
@abc.abstractmethod
def joinpath(self, child):
"""
Return Traversable child in self
"""
def __truediv__(self, child):
"""
Return Traversable child in self
"""
return self.joinpath(child)
@abc.abstractmethod
def open(self, mode='r', *args, **kwargs):
"""
mode may be 'r' or 'rb' to open as text or binary. Return a handle
suitable for reading (same as pathlib.Path.open).
When opening as text, accepts encoding parameters such as those
accepted by io.TextIOWrapper.
"""
@abc.abstractproperty
def name(self) -> str:
"""
The base name of this object without any parent references.
"""
class TraversableResources(ResourceReader):
"""
The required interface for providing traversable
resources.
"""
@abc.abstractmethod
def files(self):
"""Return a Traversable object for the loaded package."""
def open_resource(self, resource):
return self.files().joinpath(resource).open('rb')
def resource_path(self, resource):
raise FileNotFoundError(resource)
def is_resource(self, path):
return self.files().joinpath(path).is_file()
def contents(self):
return (item.name for item in self.files().iterdir())

View File

View File

@ -0,0 +1,122 @@
import collections
import pathlib
import operator
from . import abc
from ._itertools import unique_everseen
from ._compat import ZipPath
def remove_duplicates(items):
return iter(collections.OrderedDict.fromkeys(items))
class FileReader(abc.TraversableResources):
def __init__(self, loader):
self.path = pathlib.Path(loader.path).parent
def resource_path(self, resource):
"""
Return the file system path to prevent
`resources.path()` from creating a temporary
copy.
"""
return str(self.path.joinpath(resource))
def files(self):
return self.path
class ZipReader(abc.TraversableResources):
def __init__(self, loader, module):
_, _, name = module.rpartition('.')
self.prefix = loader.prefix.replace('\\', '/') + name + '/'
self.archive = loader.archive
def open_resource(self, resource):
try:
return super().open_resource(resource)
except KeyError as exc:
raise FileNotFoundError(exc.args[0])
def is_resource(self, path):
# workaround for `zipfile.Path.is_file` returning true
# for non-existent paths.
target = self.files().joinpath(path)
return target.is_file() and target.exists()
def files(self):
return ZipPath(self.archive, self.prefix)
class MultiplexedPath(abc.Traversable):
"""
Given a series of Traversable objects, implement a merged
version of the interface across all objects. Useful for
namespace packages which may be multihomed at a single
name.
"""
def __init__(self, *paths):
self._paths = list(map(pathlib.Path, remove_duplicates(paths)))
if not self._paths:
message = 'MultiplexedPath must contain at least one path'
raise FileNotFoundError(message)
if not all(path.is_dir() for path in self._paths):
raise NotADirectoryError('MultiplexedPath only supports directories')
def iterdir(self):
files = (file for path in self._paths for file in path.iterdir())
return unique_everseen(files, key=operator.attrgetter('name'))
def read_bytes(self):
raise FileNotFoundError(f'{self} is not a file')
def read_text(self, *args, **kwargs):
raise FileNotFoundError(f'{self} is not a file')
def is_dir(self):
return True
def is_file(self):
return False
def joinpath(self, child):
# first try to find child in current paths
for file in self.iterdir():
if file.name == child:
return file
# if it does not exist, construct it with the first path
return self._paths[0] / child
__truediv__ = joinpath
def open(self, *args, **kwargs):
raise FileNotFoundError(f'{self} is not a file')
@property
def name(self):
return self._paths[0].name
def __repr__(self):
paths = ', '.join(f"'{path}'" for path in self._paths)
return f'MultiplexedPath({paths})'
class NamespaceReader(abc.TraversableResources):
def __init__(self, namespace_path):
if 'NamespacePath' not in str(namespace_path):
raise ValueError('Invalid path')
self.path = MultiplexedPath(*list(namespace_path))
def resource_path(self, resource):
"""
Return the file system path to prevent
`resources.path()` from creating a temporary
copy.
"""
return str(self.path.joinpath(resource))
def files(self):
return self.path

View File

@ -0,0 +1,116 @@
"""
Interface adapters for low-level readers.
"""
import abc
import io
import itertools
from typing import BinaryIO, List
from .abc import Traversable, TraversableResources
class SimpleReader(abc.ABC):
"""
The minimum, low-level interface required from a resource
provider.
"""
@abc.abstractproperty
def package(self):
# type: () -> str
"""
The name of the package for which this reader loads resources.
"""
@abc.abstractmethod
def children(self):
# type: () -> List['SimpleReader']
"""
Obtain an iterable of SimpleReader for available
child containers (e.g. directories).
"""
@abc.abstractmethod
def resources(self):
# type: () -> List[str]
"""
Obtain available named resources for this virtual package.
"""
@abc.abstractmethod
def open_binary(self, resource):
# type: (str) -> BinaryIO
"""
Obtain a File-like for a named resource.
"""
@property
def name(self):
return self.package.split('.')[-1]
class ResourceHandle(Traversable):
"""
Handle to a named resource in a ResourceReader.
"""
def __init__(self, parent, name):
# type: (ResourceContainer, str) -> None
self.parent = parent
self.name = name # type: ignore
def is_file(self):
return True
def is_dir(self):
return False
def open(self, mode='r', *args, **kwargs):
stream = self.parent.reader.open_binary(self.name)
if 'b' not in mode:
stream = io.TextIOWrapper(*args, **kwargs)
return stream
def joinpath(self, name):
raise RuntimeError("Cannot traverse into a resource")
class ResourceContainer(Traversable):
"""
Traversable container for a package's resources via its reader.
"""
def __init__(self, reader):
# type: (SimpleReader) -> None
self.reader = reader
def is_dir(self):
return True
def is_file(self):
return False
def iterdir(self):
files = (ResourceHandle(self, name) for name in self.reader.resources)
dirs = map(ResourceContainer, self.reader.children())
return itertools.chain(files, dirs)
def open(self, *args, **kwargs):
raise IsADirectoryError()
def joinpath(self, name):
return next(
traversable for traversable in self.iterdir() if traversable.name == name
)
class TraversableReader(TraversableResources, SimpleReader):
"""
A TraversableResources based on SimpleReader. Resource providers
may derive from this class to provide the TraversableResources
interface by supplying the SimpleReader interface.
"""
def files(self):
return ResourceContainer(self)

View File

@ -0,0 +1,19 @@
import os
try:
from test.support import import_helper # type: ignore
except ImportError:
# Python 3.9 and earlier
class import_helper: # type: ignore
from test.support import modules_setup, modules_cleanup
try:
# Python 3.10
from test.support.os_helper import unlink
except ImportError:
from test.support import unlink as _unlink
def unlink(target):
return _unlink(os.fspath(target))

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1 @@
Hello, UTF-8 world!

View File

@ -0,0 +1 @@
one resource

View File

@ -0,0 +1 @@
two resource

View File

@ -0,0 +1 @@
Hello, UTF-8 world!

View File

@ -0,0 +1,102 @@
import io
import unittest
import importlib_resources as resources
from importlib_resources._adapters import (
CompatibilityFiles,
wrap_spec,
)
from . import util
class CompatibilityFilesTests(unittest.TestCase):
@property
def package(self):
bytes_data = io.BytesIO(b'Hello, world!')
return util.create_package(
file=bytes_data,
path='some_path',
contents=('a', 'b', 'c'),
)
@property
def files(self):
return resources.files(self.package)
def test_spec_path_iter(self):
self.assertEqual(
sorted(path.name for path in self.files.iterdir()),
['a', 'b', 'c'],
)
def test_child_path_iter(self):
self.assertEqual(list((self.files / 'a').iterdir()), [])
def test_orphan_path_iter(self):
self.assertEqual(list((self.files / 'a' / 'a').iterdir()), [])
self.assertEqual(list((self.files / 'a' / 'a' / 'a').iterdir()), [])
def test_spec_path_is(self):
self.assertFalse(self.files.is_file())
self.assertFalse(self.files.is_dir())
def test_child_path_is(self):
self.assertTrue((self.files / 'a').is_file())
self.assertFalse((self.files / 'a').is_dir())
def test_orphan_path_is(self):
self.assertFalse((self.files / 'a' / 'a').is_file())
self.assertFalse((self.files / 'a' / 'a').is_dir())
self.assertFalse((self.files / 'a' / 'a' / 'a').is_file())
self.assertFalse((self.files / 'a' / 'a' / 'a').is_dir())
def test_spec_path_name(self):
self.assertEqual(self.files.name, 'testingpackage')
def test_child_path_name(self):
self.assertEqual((self.files / 'a').name, 'a')
def test_orphan_path_name(self):
self.assertEqual((self.files / 'a' / 'b').name, 'b')
self.assertEqual((self.files / 'a' / 'b' / 'c').name, 'c')
def test_spec_path_open(self):
self.assertEqual(self.files.read_bytes(), b'Hello, world!')
self.assertEqual(self.files.read_text(), 'Hello, world!')
def test_child_path_open(self):
self.assertEqual((self.files / 'a').read_bytes(), b'Hello, world!')
self.assertEqual((self.files / 'a').read_text(), 'Hello, world!')
def test_orphan_path_open(self):
with self.assertRaises(FileNotFoundError):
(self.files / 'a' / 'b').read_bytes()
with self.assertRaises(FileNotFoundError):
(self.files / 'a' / 'b' / 'c').read_bytes()
def test_open_invalid_mode(self):
with self.assertRaises(ValueError):
self.files.open('0')
def test_orphan_path_invalid(self):
with self.assertRaises(ValueError):
CompatibilityFiles.OrphanPath()
def test_wrap_spec(self):
spec = wrap_spec(self.package)
self.assertIsInstance(spec.loader.get_resource_reader(None), CompatibilityFiles)
class CompatibilityFilesNoReaderTests(unittest.TestCase):
@property
def package(self):
return util.create_package_from_loader(None)
@property
def files(self):
return resources.files(self.package)
def test_spec_path_joinpath(self):
self.assertIsInstance(self.files / 'a', CompatibilityFiles.OrphanPath)

View File

@ -0,0 +1,43 @@
import unittest
import importlib_resources as resources
from . import data01
from . import util
class ContentsTests:
expected = {
'__init__.py',
'binary.file',
'subdirectory',
'utf-16.file',
'utf-8.file',
}
def test_contents(self):
contents = {path.name for path in resources.files(self.data).iterdir()}
assert self.expected <= contents
class ContentsDiskTests(ContentsTests, unittest.TestCase):
def setUp(self):
self.data = data01
class ContentsZipTests(ContentsTests, util.ZipSetup, unittest.TestCase):
pass
class ContentsNamespaceTests(ContentsTests, unittest.TestCase):
expected = {
# no __init__ because of namespace design
# no subdirectory as incidental difference in fixture
'binary.file',
'utf-16.file',
'utf-8.file',
}
def setUp(self):
from . import namespacedata01
self.data = namespacedata01

View File

@ -0,0 +1,46 @@
import typing
import unittest
import importlib_resources as resources
from importlib_resources.abc import Traversable
from . import data01
from . import util
class FilesTests:
def test_read_bytes(self):
files = resources.files(self.data)
actual = files.joinpath('utf-8.file').read_bytes()
assert actual == b'Hello, UTF-8 world!\n'
def test_read_text(self):
files = resources.files(self.data)
actual = files.joinpath('utf-8.file').read_text(encoding='utf-8')
assert actual == 'Hello, UTF-8 world!\n'
@unittest.skipUnless(
hasattr(typing, 'runtime_checkable'),
"Only suitable when typing supports runtime_checkable",
)
def test_traversable(self):
assert isinstance(resources.files(self.data), Traversable)
class OpenDiskTests(FilesTests, unittest.TestCase):
def setUp(self):
self.data = data01
class OpenZipTests(FilesTests, util.ZipSetup, unittest.TestCase):
pass
class OpenNamespaceTests(FilesTests, unittest.TestCase):
def setUp(self):
from . import namespacedata01
self.data = namespacedata01
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,81 @@
import unittest
import importlib_resources as resources
from . import data01
from . import util
class CommonBinaryTests(util.CommonTests, unittest.TestCase):
def execute(self, package, path):
target = resources.files(package).joinpath(path)
with target.open('rb'):
pass
class CommonTextTests(util.CommonTests, unittest.TestCase):
def execute(self, package, path):
target = resources.files(package).joinpath(path)
with target.open():
pass
class OpenTests:
def test_open_binary(self):
target = resources.files(self.data) / 'binary.file'
with target.open('rb') as fp:
result = fp.read()
self.assertEqual(result, b'\x00\x01\x02\x03')
def test_open_text_default_encoding(self):
target = resources.files(self.data) / 'utf-8.file'
with target.open() as fp:
result = fp.read()
self.assertEqual(result, 'Hello, UTF-8 world!\n')
def test_open_text_given_encoding(self):
target = resources.files(self.data) / 'utf-16.file'
with target.open(encoding='utf-16', errors='strict') as fp:
result = fp.read()
self.assertEqual(result, 'Hello, UTF-16 world!\n')
def test_open_text_with_errors(self):
# Raises UnicodeError without the 'errors' argument.
target = resources.files(self.data) / 'utf-16.file'
with target.open(encoding='utf-8', errors='strict') as fp:
self.assertRaises(UnicodeError, fp.read)
with target.open(encoding='utf-8', errors='ignore') as fp:
result = fp.read()
self.assertEqual(
result,
'H\x00e\x00l\x00l\x00o\x00,\x00 '
'\x00U\x00T\x00F\x00-\x001\x006\x00 '
'\x00w\x00o\x00r\x00l\x00d\x00!\x00\n\x00',
)
def test_open_binary_FileNotFoundError(self):
target = resources.files(self.data) / 'does-not-exist'
self.assertRaises(FileNotFoundError, target.open, 'rb')
def test_open_text_FileNotFoundError(self):
target = resources.files(self.data) / 'does-not-exist'
self.assertRaises(FileNotFoundError, target.open)
class OpenDiskTests(OpenTests, unittest.TestCase):
def setUp(self):
self.data = data01
class OpenDiskNamespaceTests(OpenTests, unittest.TestCase):
def setUp(self):
from . import namespacedata01
self.data = namespacedata01
class OpenZipTests(OpenTests, util.ZipSetup, unittest.TestCase):
pass
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,64 @@
import io
import unittest
import importlib_resources as resources
from . import data01
from . import util
class CommonTests(util.CommonTests, unittest.TestCase):
def execute(self, package, path):
with resources.as_file(resources.files(package).joinpath(path)):
pass
class PathTests:
def test_reading(self):
# Path should be readable.
# Test also implicitly verifies the returned object is a pathlib.Path
# instance.
target = resources.files(self.data) / 'utf-8.file'
with resources.as_file(target) as path:
self.assertTrue(path.name.endswith("utf-8.file"), repr(path))
# pathlib.Path.read_text() was introduced in Python 3.5.
with path.open('r', encoding='utf-8') as file:
text = file.read()
self.assertEqual('Hello, UTF-8 world!\n', text)
class PathDiskTests(PathTests, unittest.TestCase):
data = data01
def test_natural_path(self):
"""
Guarantee the internal implementation detail that
file-system-backed resources do not get the tempdir
treatment.
"""
target = resources.files(self.data) / 'utf-8.file'
with resources.as_file(target) as path:
assert 'data' in str(path)
class PathMemoryTests(PathTests, unittest.TestCase):
def setUp(self):
file = io.BytesIO(b'Hello, UTF-8 world!\n')
self.addCleanup(file.close)
self.data = util.create_package(
file=file, path=FileNotFoundError("package exists only in memory")
)
self.data.__spec__.origin = None
self.data.__spec__.has_location = False
class PathZipTests(PathTests, util.ZipSetup, unittest.TestCase):
def test_remove_in_context_manager(self):
# It is not an error if the file that was temporarily stashed on the
# file system is removed inside the `with` stanza.
target = resources.files(self.data) / 'utf-8.file'
with resources.as_file(target) as path:
path.unlink()
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,76 @@
import unittest
import importlib_resources as resources
from . import data01
from . import util
from importlib import import_module
class CommonBinaryTests(util.CommonTests, unittest.TestCase):
def execute(self, package, path):
resources.files(package).joinpath(path).read_bytes()
class CommonTextTests(util.CommonTests, unittest.TestCase):
def execute(self, package, path):
resources.files(package).joinpath(path).read_text()
class ReadTests:
def test_read_bytes(self):
result = resources.files(self.data).joinpath('binary.file').read_bytes()
self.assertEqual(result, b'\0\1\2\3')
def test_read_text_default_encoding(self):
result = resources.files(self.data).joinpath('utf-8.file').read_text()
self.assertEqual(result, 'Hello, UTF-8 world!\n')
def test_read_text_given_encoding(self):
result = (
resources.files(self.data)
.joinpath('utf-16.file')
.read_text(encoding='utf-16')
)
self.assertEqual(result, 'Hello, UTF-16 world!\n')
def test_read_text_with_errors(self):
# Raises UnicodeError without the 'errors' argument.
target = resources.files(self.data) / 'utf-16.file'
self.assertRaises(UnicodeError, target.read_text, encoding='utf-8')
result = target.read_text(encoding='utf-8', errors='ignore')
self.assertEqual(
result,
'H\x00e\x00l\x00l\x00o\x00,\x00 '
'\x00U\x00T\x00F\x00-\x001\x006\x00 '
'\x00w\x00o\x00r\x00l\x00d\x00!\x00\n\x00',
)
class ReadDiskTests(ReadTests, unittest.TestCase):
data = data01
class ReadZipTests(ReadTests, util.ZipSetup, unittest.TestCase):
def test_read_submodule_resource(self):
submodule = import_module('ziptestdata.subdirectory')
result = resources.files(submodule).joinpath('binary.file').read_bytes()
self.assertEqual(result, b'\0\1\2\3')
def test_read_submodule_resource_by_name(self):
result = (
resources.files('ziptestdata.subdirectory')
.joinpath('binary.file')
.read_bytes()
)
self.assertEqual(result, b'\0\1\2\3')
class ReadNamespaceTests(ReadTests, unittest.TestCase):
def setUp(self):
from . import namespacedata01
self.data = namespacedata01
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,128 @@
import os.path
import sys
import pathlib
import unittest
from importlib import import_module
from importlib_resources.readers import MultiplexedPath, NamespaceReader
class MultiplexedPathTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
path = pathlib.Path(__file__).parent / 'namespacedata01'
cls.folder = str(path)
def test_init_no_paths(self):
with self.assertRaises(FileNotFoundError):
MultiplexedPath()
def test_init_file(self):
with self.assertRaises(NotADirectoryError):
MultiplexedPath(os.path.join(self.folder, 'binary.file'))
def test_iterdir(self):
contents = {path.name for path in MultiplexedPath(self.folder).iterdir()}
try:
contents.remove('__pycache__')
except (KeyError, ValueError):
pass
self.assertEqual(contents, {'binary.file', 'utf-16.file', 'utf-8.file'})
def test_iterdir_duplicate(self):
data01 = os.path.abspath(os.path.join(__file__, '..', 'data01'))
contents = {
path.name for path in MultiplexedPath(self.folder, data01).iterdir()
}
for remove in ('__pycache__', '__init__.pyc'):
try:
contents.remove(remove)
except (KeyError, ValueError):
pass
self.assertEqual(
contents,
{'__init__.py', 'binary.file', 'subdirectory', 'utf-16.file', 'utf-8.file'},
)
def test_is_dir(self):
self.assertEqual(MultiplexedPath(self.folder).is_dir(), True)
def test_is_file(self):
self.assertEqual(MultiplexedPath(self.folder).is_file(), False)
def test_open_file(self):
path = MultiplexedPath(self.folder)
with self.assertRaises(FileNotFoundError):
path.read_bytes()
with self.assertRaises(FileNotFoundError):
path.read_text()
with self.assertRaises(FileNotFoundError):
path.open()
def test_join_path(self):
prefix = os.path.abspath(os.path.join(__file__, '..'))
data01 = os.path.join(prefix, 'data01')
path = MultiplexedPath(self.folder, data01)
self.assertEqual(
str(path.joinpath('binary.file'))[len(prefix) + 1 :],
os.path.join('namespacedata01', 'binary.file'),
)
self.assertEqual(
str(path.joinpath('subdirectory'))[len(prefix) + 1 :],
os.path.join('data01', 'subdirectory'),
)
self.assertEqual(
str(path.joinpath('imaginary'))[len(prefix) + 1 :],
os.path.join('namespacedata01', 'imaginary'),
)
def test_repr(self):
self.assertEqual(
repr(MultiplexedPath(self.folder)),
f"MultiplexedPath('{self.folder}')",
)
def test_name(self):
self.assertEqual(
MultiplexedPath(self.folder).name,
os.path.basename(self.folder),
)
class NamespaceReaderTest(unittest.TestCase):
site_dir = str(pathlib.Path(__file__).parent)
@classmethod
def setUpClass(cls):
sys.path.append(cls.site_dir)
@classmethod
def tearDownClass(cls):
sys.path.remove(cls.site_dir)
def test_init_error(self):
with self.assertRaises(ValueError):
NamespaceReader(['path1', 'path2'])
def test_resource_path(self):
namespacedata01 = import_module('namespacedata01')
reader = NamespaceReader(namespacedata01.__spec__.submodule_search_locations)
root = os.path.abspath(os.path.join(__file__, '..', 'namespacedata01'))
self.assertEqual(
reader.resource_path('binary.file'), os.path.join(root, 'binary.file')
)
self.assertEqual(
reader.resource_path('imaginary'), os.path.join(root, 'imaginary')
)
def test_files(self):
namespacedata01 = import_module('namespacedata01')
reader = NamespaceReader(namespacedata01.__spec__.submodule_search_locations)
root = os.path.abspath(os.path.join(__file__, '..', 'namespacedata01'))
self.assertIsInstance(reader.files(), MultiplexedPath)
self.assertEqual(repr(reader.files()), f"MultiplexedPath('{root}')")
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,252 @@
import sys
import unittest
import importlib_resources as resources
import uuid
import pathlib
from . import data01
from . import zipdata01, zipdata02
from . import util
from importlib import import_module
from ._compat import import_helper, unlink
class ResourceTests:
# Subclasses are expected to set the `data` attribute.
def test_is_file_exists(self):
target = resources.files(self.data) / 'binary.file'
self.assertTrue(target.is_file())
def test_is_file_missing(self):
target = resources.files(self.data) / 'not-a-file'
self.assertFalse(target.is_file())
def test_is_dir(self):
target = resources.files(self.data) / 'subdirectory'
self.assertFalse(target.is_file())
self.assertTrue(target.is_dir())
class ResourceDiskTests(ResourceTests, unittest.TestCase):
def setUp(self):
self.data = data01
class ResourceZipTests(ResourceTests, util.ZipSetup, unittest.TestCase):
pass
def names(traversable):
return {item.name for item in traversable.iterdir()}
class ResourceLoaderTests(unittest.TestCase):
def test_resource_contents(self):
package = util.create_package(
file=data01, path=data01.__file__, contents=['A', 'B', 'C']
)
self.assertEqual(names(resources.files(package)), {'A', 'B', 'C'})
def test_is_file(self):
package = util.create_package(
file=data01, path=data01.__file__, contents=['A', 'B', 'C', 'D/E', 'D/F']
)
self.assertTrue(resources.files(package).joinpath('B').is_file())
def test_is_dir(self):
package = util.create_package(
file=data01, path=data01.__file__, contents=['A', 'B', 'C', 'D/E', 'D/F']
)
self.assertTrue(resources.files(package).joinpath('D').is_dir())
def test_resource_missing(self):
package = util.create_package(
file=data01, path=data01.__file__, contents=['A', 'B', 'C', 'D/E', 'D/F']
)
self.assertFalse(resources.files(package).joinpath('Z').is_file())
class ResourceCornerCaseTests(unittest.TestCase):
def test_package_has_no_reader_fallback(self):
# Test odd ball packages which:
# 1. Do not have a ResourceReader as a loader
# 2. Are not on the file system
# 3. Are not in a zip file
module = util.create_package(
file=data01, path=data01.__file__, contents=['A', 'B', 'C']
)
# Give the module a dummy loader.
module.__loader__ = object()
# Give the module a dummy origin.
module.__file__ = '/path/which/shall/not/be/named'
module.__spec__.loader = module.__loader__
module.__spec__.origin = module.__file__
self.assertFalse(resources.files(module).joinpath('A').is_file())
class ResourceFromZipsTest01(util.ZipSetupBase, unittest.TestCase):
ZIP_MODULE = zipdata01 # type: ignore
def test_is_submodule_resource(self):
submodule = import_module('ziptestdata.subdirectory')
self.assertTrue(resources.files(submodule).joinpath('binary.file').is_file())
def test_read_submodule_resource_by_name(self):
self.assertTrue(
resources.files('ziptestdata.subdirectory')
.joinpath('binary.file')
.is_file()
)
def test_submodule_contents(self):
submodule = import_module('ziptestdata.subdirectory')
self.assertEqual(
names(resources.files(submodule)), {'__init__.py', 'binary.file'}
)
def test_submodule_contents_by_name(self):
self.assertEqual(
names(resources.files('ziptestdata.subdirectory')),
{'__init__.py', 'binary.file'},
)
class ResourceFromZipsTest02(util.ZipSetupBase, unittest.TestCase):
ZIP_MODULE = zipdata02 # type: ignore
def test_unrelated_contents(self):
"""
Test thata zip with two unrelated subpackages return
distinct resources. Ref python/importlib_resources#44.
"""
self.assertEqual(
names(resources.files('ziptestdata.one')),
{'__init__.py', 'resource1.txt'},
)
self.assertEqual(
names(resources.files('ziptestdata.two')),
{'__init__.py', 'resource2.txt'},
)
class DeletingZipsTest(unittest.TestCase):
"""Having accessed resources in a zip file should not keep an open
reference to the zip.
"""
ZIP_MODULE = zipdata01
def setUp(self):
modules = import_helper.modules_setup()
self.addCleanup(import_helper.modules_cleanup, *modules)
data_path = pathlib.Path(self.ZIP_MODULE.__file__)
data_dir = data_path.parent
self.source_zip_path = data_dir / 'ziptestdata.zip'
self.zip_path = pathlib.Path(f'{uuid.uuid4()}.zip').absolute()
self.zip_path.write_bytes(self.source_zip_path.read_bytes())
sys.path.append(str(self.zip_path))
self.data = import_module('ziptestdata')
def tearDown(self):
try:
sys.path.remove(str(self.zip_path))
except ValueError:
pass
try:
del sys.path_importer_cache[str(self.zip_path)]
del sys.modules[self.data.__name__]
except KeyError:
pass
try:
unlink(self.zip_path)
except OSError:
# If the test fails, this will probably fail too
pass
def test_iterdir_does_not_keep_open(self):
c = [item.name for item in resources.files('ziptestdata').iterdir()]
self.zip_path.unlink()
del c
def test_is_file_does_not_keep_open(self):
c = resources.files('ziptestdata').joinpath('binary.file').is_file()
self.zip_path.unlink()
del c
def test_is_file_failure_does_not_keep_open(self):
c = resources.files('ziptestdata').joinpath('not-present').is_file()
self.zip_path.unlink()
del c
@unittest.skip("Desired but not supported.")
def test_as_file_does_not_keep_open(self): # pragma: no cover
c = resources.as_file(resources.files('ziptestdata') / 'binary.file')
self.zip_path.unlink()
del c
def test_entered_path_does_not_keep_open(self):
# This is what certifi does on import to make its bundle
# available for the process duration.
c = resources.as_file(
resources.files('ziptestdata') / 'binary.file'
).__enter__()
self.zip_path.unlink()
del c
def test_read_binary_does_not_keep_open(self):
c = resources.files('ziptestdata').joinpath('binary.file').read_bytes()
self.zip_path.unlink()
del c
def test_read_text_does_not_keep_open(self):
c = resources.files('ziptestdata').joinpath('utf-8.file').read_text()
self.zip_path.unlink()
del c
class ResourceFromNamespaceTest01(unittest.TestCase):
site_dir = str(pathlib.Path(__file__).parent)
@classmethod
def setUpClass(cls):
sys.path.append(cls.site_dir)
@classmethod
def tearDownClass(cls):
sys.path.remove(cls.site_dir)
def test_is_submodule_resource(self):
self.assertTrue(
resources.files(import_module('namespacedata01'))
.joinpath('binary.file')
.is_file()
)
def test_read_submodule_resource_by_name(self):
self.assertTrue(
resources.files('namespacedata01').joinpath('binary.file').is_file()
)
def test_submodule_contents(self):
contents = names(resources.files(import_module('namespacedata01')))
try:
contents.remove('__pycache__')
except KeyError:
pass
self.assertEqual(contents, {'binary.file', 'utf-8.file', 'utf-16.file'})
def test_submodule_contents_by_name(self):
contents = names(resources.files('namespacedata01'))
try:
contents.remove('__pycache__')
except KeyError:
pass
self.assertEqual(contents, {'binary.file', 'utf-8.file', 'utf-16.file'})
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,53 @@
"""
Generate the zip test data files.
Run to build the tests/zipdataNN/ziptestdata.zip files from
files in tests/dataNN.
Replaces the file with the working copy, but does commit anything
to the source repo.
"""
import contextlib
import os
import pathlib
import zipfile
def main():
"""
>>> from unittest import mock
>>> monkeypatch = getfixture('monkeypatch')
>>> monkeypatch.setattr(zipfile, 'ZipFile', mock.MagicMock())
>>> print(); main() # print workaround for bpo-32509
<BLANKLINE>
...data01... -> ziptestdata/...
...
...data02... -> ziptestdata/...
...
"""
suffixes = '01', '02'
tuple(map(generate, suffixes))
def generate(suffix):
root = pathlib.Path(__file__).parent.relative_to(os.getcwd())
zfpath = root / f'zipdata{suffix}/ziptestdata.zip'
with zipfile.ZipFile(zfpath, 'w') as zf:
for src, rel in walk(root / f'data{suffix}'):
dst = 'ziptestdata' / pathlib.PurePosixPath(rel.as_posix())
print(src, '->', dst)
zf.write(src, dst)
def walk(datapath):
for dirpath, dirnames, filenames in os.walk(datapath):
with contextlib.suppress(KeyError):
dirnames.remove('__pycache__')
for filename in filenames:
res = pathlib.Path(dirpath) / filename
rel = res.relative_to(datapath)
yield res, rel
__name__ == '__main__' and main()

View File

@ -0,0 +1,178 @@
import abc
import importlib
import io
import sys
import types
from pathlib import Path, PurePath
from . import data01
from . import zipdata01
from ..abc import ResourceReader
from ._compat import import_helper
from importlib.machinery import ModuleSpec
class Reader(ResourceReader):
def __init__(self, **kwargs):
vars(self).update(kwargs)
def get_resource_reader(self, package):
return self
def open_resource(self, path):
self._path = path
if isinstance(self.file, Exception):
raise self.file
return self.file
def resource_path(self, path_):
self._path = path_
if isinstance(self.path, Exception):
raise self.path
return self.path
def is_resource(self, path_):
self._path = path_
if isinstance(self.path, Exception):
raise self.path
def part(entry):
return entry.split('/')
return any(
len(parts) == 1 and parts[0] == path_ for parts in map(part, self._contents)
)
def contents(self):
if isinstance(self.path, Exception):
raise self.path
yield from self._contents
def create_package_from_loader(loader, is_package=True):
name = 'testingpackage'
module = types.ModuleType(name)
spec = ModuleSpec(name, loader, origin='does-not-exist', is_package=is_package)
module.__spec__ = spec
module.__loader__ = loader
return module
def create_package(file=None, path=None, is_package=True, contents=()):
return create_package_from_loader(
Reader(file=file, path=path, _contents=contents),
is_package,
)
class CommonTests(metaclass=abc.ABCMeta):
"""
Tests shared by test_open, test_path, and test_read.
"""
@abc.abstractmethod
def execute(self, package, path):
"""
Call the pertinent legacy API function (e.g. open_text, path)
on package and path.
"""
def test_package_name(self):
# Passing in the package name should succeed.
self.execute(data01.__name__, 'utf-8.file')
def test_package_object(self):
# Passing in the package itself should succeed.
self.execute(data01, 'utf-8.file')
def test_string_path(self):
# Passing in a string for the path should succeed.
path = 'utf-8.file'
self.execute(data01, path)
def test_pathlib_path(self):
# Passing in a pathlib.PurePath object for the path should succeed.
path = PurePath('utf-8.file')
self.execute(data01, path)
def test_importing_module_as_side_effect(self):
# The anchor package can already be imported.
del sys.modules[data01.__name__]
self.execute(data01.__name__, 'utf-8.file')
def test_non_package_by_name(self):
# The anchor package cannot be a module.
with self.assertRaises(TypeError):
self.execute(__name__, 'utf-8.file')
def test_non_package_by_package(self):
# The anchor package cannot be a module.
with self.assertRaises(TypeError):
module = sys.modules['importlib_resources.tests.util']
self.execute(module, 'utf-8.file')
def test_missing_path(self):
# Attempting to open or read or request the path for a
# non-existent path should succeed if open_resource
# can return a viable data stream.
bytes_data = io.BytesIO(b'Hello, world!')
package = create_package(file=bytes_data, path=FileNotFoundError())
self.execute(package, 'utf-8.file')
self.assertEqual(package.__loader__._path, 'utf-8.file')
def test_extant_path(self):
# Attempting to open or read or request the path when the
# path does exist should still succeed. Does not assert
# anything about the result.
bytes_data = io.BytesIO(b'Hello, world!')
# any path that exists
path = __file__
package = create_package(file=bytes_data, path=path)
self.execute(package, 'utf-8.file')
self.assertEqual(package.__loader__._path, 'utf-8.file')
def test_useless_loader(self):
package = create_package(file=FileNotFoundError(), path=FileNotFoundError())
with self.assertRaises(FileNotFoundError):
self.execute(package, 'utf-8.file')
class ZipSetupBase:
ZIP_MODULE = None
@classmethod
def setUpClass(cls):
data_path = Path(cls.ZIP_MODULE.__file__)
data_dir = data_path.parent
cls._zip_path = str(data_dir / 'ziptestdata.zip')
sys.path.append(cls._zip_path)
cls.data = importlib.import_module('ziptestdata')
@classmethod
def tearDownClass(cls):
try:
sys.path.remove(cls._zip_path)
except ValueError:
pass
try:
del sys.path_importer_cache[cls._zip_path]
del sys.modules[cls.data.__name__]
except KeyError:
pass
try:
del cls.data
del cls._zip_path
except AttributeError:
pass
def setUp(self):
modules = import_helper.modules_setup()
self.addCleanup(import_helper.modules_cleanup, *modules)
class ZipSetup(ZipSetupBase):
ZIP_MODULE = zipdata01 # type: ignore

View File

@ -19,7 +19,7 @@ flask-restful=0.3.8
flask-socketio=5.0.2dev
future=0.18.2
gitpython=2.1.9
guessit=3.3.1
guessit=3.4.3
guess_language-spirit=0.5.3
html5lib=1.1
Js2Py=0.63 <-- modified: manually merged from upstream: https://github.com/PiotrDabkowski/Js2Py/pull/192/files
@ -62,3 +62,6 @@ Pygments=2.8.1 # Required-by: rich
commonmark=0.9.1 # Required-by: rich
typing-extensions=3.7.4.3 # Required-by: rich
colorama=0.4.4 # Required-by: rich
importlib_resources=5.4.0 # Required-by: guessit
zipp=3.7.0 # Required-by: guessit

329
libs/zipp.py Normal file
View File

@ -0,0 +1,329 @@
import io
import posixpath
import zipfile
import itertools
import contextlib
import sys
import pathlib
if sys.version_info < (3, 7):
from collections import OrderedDict
else:
OrderedDict = dict
__all__ = ['Path']
def _parents(path):
"""
Given a path with elements separated by
posixpath.sep, generate all parents of that path.
>>> list(_parents('b/d'))
['b']
>>> list(_parents('/b/d/'))
['/b']
>>> list(_parents('b/d/f/'))
['b/d', 'b']
>>> list(_parents('b'))
[]
>>> list(_parents(''))
[]
"""
return itertools.islice(_ancestry(path), 1, None)
def _ancestry(path):
"""
Given a path with elements separated by
posixpath.sep, generate all elements of that path
>>> list(_ancestry('b/d'))
['b/d', 'b']
>>> list(_ancestry('/b/d/'))
['/b/d', '/b']
>>> list(_ancestry('b/d/f/'))
['b/d/f', 'b/d', 'b']
>>> list(_ancestry('b'))
['b']
>>> list(_ancestry(''))
[]
"""
path = path.rstrip(posixpath.sep)
while path and path != posixpath.sep:
yield path
path, tail = posixpath.split(path)
_dedupe = OrderedDict.fromkeys
"""Deduplicate an iterable in original order"""
def _difference(minuend, subtrahend):
"""
Return items in minuend not in subtrahend, retaining order
with O(1) lookup.
"""
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
class CompleteDirs(zipfile.ZipFile):
"""
A ZipFile subclass that ensures that implied directories
are always included in the namelist.
"""
@staticmethod
def _implied_dirs(names):
parents = itertools.chain.from_iterable(map(_parents, names))
as_dirs = (p + posixpath.sep for p in parents)
return _dedupe(_difference(as_dirs, names))
def namelist(self):
names = super(CompleteDirs, self).namelist()
return names + list(self._implied_dirs(names))
def _name_set(self):
return set(self.namelist())
def resolve_dir(self, name):
"""
If the name represents a directory, return that name
as a directory (with the trailing slash).
"""
names = self._name_set()
dirname = name + '/'
dir_match = name not in names and dirname in names
return dirname if dir_match else name
@classmethod
def make(cls, source):
"""
Given a source (filename or zipfile), return an
appropriate CompleteDirs subclass.
"""
if isinstance(source, CompleteDirs):
return source
if not isinstance(source, zipfile.ZipFile):
return cls(_pathlib_compat(source))
# Only allow for FastLookup when supplied zipfile is read-only
if 'r' not in source.mode:
cls = CompleteDirs
source.__class__ = cls
return source
class FastLookup(CompleteDirs):
"""
ZipFile subclass to ensure implicit
dirs exist and are resolved rapidly.
"""
def namelist(self):
with contextlib.suppress(AttributeError):
return self.__names
self.__names = super(FastLookup, self).namelist()
return self.__names
def _name_set(self):
with contextlib.suppress(AttributeError):
return self.__lookup
self.__lookup = super(FastLookup, self)._name_set()
return self.__lookup
def _pathlib_compat(path):
"""
For path-like objects, convert to a filename for compatibility
on Python 3.6.1 and earlier.
"""
try:
return path.__fspath__()
except AttributeError:
return str(path)
class Path:
"""
A pathlib-compatible interface for zip files.
Consider a zip file with this structure::
.
a.txt
b
c.txt
d
e.txt
>>> data = io.BytesIO()
>>> zf = zipfile.ZipFile(data, 'w')
>>> zf.writestr('a.txt', 'content of a')
>>> zf.writestr('b/c.txt', 'content of c')
>>> zf.writestr('b/d/e.txt', 'content of e')
>>> zf.filename = 'mem/abcde.zip'
Path accepts the zipfile object itself or a filename
>>> root = Path(zf)
From there, several path operations are available.
Directory iteration (including the zip file itself):
>>> a, b = root.iterdir()
>>> a
Path('mem/abcde.zip', 'a.txt')
>>> b
Path('mem/abcde.zip', 'b/')
name property:
>>> b.name
'b'
join with divide operator:
>>> c = b / 'c.txt'
>>> c
Path('mem/abcde.zip', 'b/c.txt')
>>> c.name
'c.txt'
Read text:
>>> c.read_text()
'content of c'
existence:
>>> c.exists()
True
>>> (b / 'missing.txt').exists()
False
Coercion to string:
>>> import os
>>> str(c).replace(os.sep, posixpath.sep)
'mem/abcde.zip/b/c.txt'
At the root, ``name``, ``filename``, and ``parent``
resolve to the zipfile. Note these attributes are not
valid and will raise a ``ValueError`` if the zipfile
has no filename.
>>> root.name
'abcde.zip'
>>> str(root.filename).replace(os.sep, posixpath.sep)
'mem/abcde.zip'
>>> str(root.parent)
'mem'
"""
__repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
def __init__(self, root, at=""):
"""
Construct a Path from a ZipFile or filename.
Note: When the source is an existing ZipFile object,
its type (__class__) will be mutated to a
specialized type. If the caller wishes to retain the
original type, the caller should either create a
separate ZipFile object or pass a filename.
"""
self.root = FastLookup.make(root)
self.at = at
def open(self, mode='r', *args, pwd=None, **kwargs):
"""
Open this entry as text or binary following the semantics
of ``pathlib.Path.open()`` by passing arguments through
to io.TextIOWrapper().
"""
if self.is_dir():
raise IsADirectoryError(self)
zip_mode = mode[0]
if not self.exists() and zip_mode == 'r':
raise FileNotFoundError(self)
stream = self.root.open(self.at, zip_mode, pwd=pwd)
if 'b' in mode:
if args or kwargs:
raise ValueError("encoding args invalid for binary operation")
return stream
return io.TextIOWrapper(stream, *args, **kwargs)
@property
def name(self):
return pathlib.Path(self.at).name or self.filename.name
@property
def suffix(self):
return pathlib.Path(self.at).suffix or self.filename.suffix
@property
def suffixes(self):
return pathlib.Path(self.at).suffixes or self.filename.suffixes
@property
def stem(self):
return pathlib.Path(self.at).stem or self.filename.stem
@property
def filename(self):
return pathlib.Path(self.root.filename).joinpath(self.at)
def read_text(self, *args, **kwargs):
with self.open('r', *args, **kwargs) as strm:
return strm.read()
def read_bytes(self):
with self.open('rb') as strm:
return strm.read()
def _is_child(self, path):
return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
def _next(self, at):
return self.__class__(self.root, at)
def is_dir(self):
return not self.at or self.at.endswith("/")
def is_file(self):
return self.exists() and not self.is_dir()
def exists(self):
return self.at in self.root._name_set()
def iterdir(self):
if not self.is_dir():
raise ValueError("Can't listdir a file")
subs = map(self._next, self.root.namelist())
return filter(self._is_child, subs)
def __str__(self):
return posixpath.join(self.root.filename, self.at)
def __repr__(self):
return self.__repr.format(self=self)
def joinpath(self, *other):
next = posixpath.join(self.at, *map(_pathlib_compat, other))
return self._next(self.root.resolve_dir(next))
__truediv__ = joinpath
@property
def parent(self):
if not self.at:
return self.filename.parent
parent_at = posixpath.dirname(self.at.rstrip('/'))
if parent_at:
parent_at += '/'
return self._next(parent_at)