mirror of https://github.com/morpheus65535/bazarr
Trying to fix Segmentation fault caused by mediainfo in docker container. #2098
This commit is contained in:
parent
7136383098
commit
7455496c4c
|
@ -1,10 +1,9 @@
|
||||||
"""Know your media files better."""
|
"""Know your media files better."""
|
||||||
__title__ = 'knowit'
|
__title__ = 'knowit'
|
||||||
__version__ = '0.4.0'
|
__version__ = '0.5.2'
|
||||||
__short_version__ = '.'.join(__version__.split('.')[:2])
|
__short_version__ = '0.5'
|
||||||
__author__ = 'Rato AQ2'
|
__author__ = 'Rato AQ2'
|
||||||
__license__ = 'MIT'
|
__license__ = 'MIT'
|
||||||
__copyright__ = 'Copyright 2016-2021, Rato AQ2'
|
|
||||||
__url__ = 'https://github.com/ratoaq2/knowit'
|
__url__ = 'https://github.com/ratoaq2/knowit'
|
||||||
|
|
||||||
#: Video extensions
|
#: Video extensions
|
||||||
|
|
|
@ -169,7 +169,7 @@ def dumps(
|
||||||
return convert(info, context)
|
return convert(info, context)
|
||||||
|
|
||||||
|
|
||||||
def main(args: typing.List[str] = None) -> None:
|
def main(args: typing.Optional[typing.List[str]] = None) -> None:
|
||||||
"""Execute main function for entry point."""
|
"""Execute main function for entry point."""
|
||||||
argument_parser = build_argument_parser()
|
argument_parser = build_argument_parser()
|
||||||
args = args or sys.argv[1:]
|
args = args or sys.argv[1:]
|
||||||
|
|
|
@ -65,7 +65,7 @@ def know(
|
||||||
raise KnowitException(debug_info(context=context, exc_info=True))
|
raise KnowitException(debug_info(context=context, exc_info=True))
|
||||||
|
|
||||||
|
|
||||||
def dependencies(context: typing.Mapping = None) -> typing.Mapping:
|
def dependencies(context: typing.Optional[typing.Mapping] = None) -> typing.Mapping:
|
||||||
"""Return all dependencies detected by knowit."""
|
"""Return all dependencies detected by knowit."""
|
||||||
deps = {}
|
deps = {}
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -63,6 +63,17 @@ class Property(Reportable[T]):
|
||||||
# Used to detect duplicated values. e.g.: en / en or High@L4.0 / High@L4.0 or Progressive / Progressive
|
# Used to detect duplicated values. e.g.: en / en or High@L4.0 / High@L4.0 or Progressive / Progressive
|
||||||
self.delimiter = delimiter
|
self.delimiter = delimiter
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_value(cls,
|
||||||
|
track: typing.Mapping,
|
||||||
|
name: str,
|
||||||
|
names: typing.List[str]):
|
||||||
|
if len(names) == 2:
|
||||||
|
parent_value = track.get(names[0], track.get(names[0].upper(), {}))
|
||||||
|
return parent_value.get(names[1], parent_value.get(names[1].upper()))
|
||||||
|
|
||||||
|
return track.get(name, track.get(name.upper()))
|
||||||
|
|
||||||
def extract_value(
|
def extract_value(
|
||||||
self,
|
self,
|
||||||
track: typing.Mapping,
|
track: typing.Mapping,
|
||||||
|
@ -71,7 +82,7 @@ class Property(Reportable[T]):
|
||||||
"""Extract the property value from a given track."""
|
"""Extract the property value from a given track."""
|
||||||
for name in self.names:
|
for name in self.names:
|
||||||
names = name.split('.')
|
names = name.split('.')
|
||||||
value = track.get(names[0], {}).get(names[1]) if len(names) == 2 else track.get(name)
|
value = self._extract_value(track, name, names)
|
||||||
if value is None:
|
if value is None:
|
||||||
if self.default is None:
|
if self.default is None:
|
||||||
continue
|
continue
|
||||||
|
@ -216,9 +227,10 @@ class MultiValue(Property):
|
||||||
class Rule(Reportable[T]):
|
class Rule(Reportable[T]):
|
||||||
"""Rule abstract class."""
|
"""Rule abstract class."""
|
||||||
|
|
||||||
def __init__(self, name: str, override=False, **kwargs):
|
def __init__(self, name: str, private=False, override=False, **kwargs):
|
||||||
"""Initialize the object."""
|
"""Initialize the object."""
|
||||||
super().__init__(name, **kwargs)
|
super().__init__(name, **kwargs)
|
||||||
|
self.private = private
|
||||||
self.override = override
|
self.override = override
|
||||||
|
|
||||||
def execute(self, props, pv_props, context: typing.Mapping):
|
def execute(self, props, pv_props, context: typing.Mapping):
|
||||||
|
|
|
@ -455,46 +455,46 @@ profiles:
|
||||||
|
|
||||||
VideoProfileLevel:
|
VideoProfileLevel:
|
||||||
L1:
|
L1:
|
||||||
default: "1"
|
default: '1'
|
||||||
technical: Level 1
|
technical: Level 1
|
||||||
L11:
|
L11:
|
||||||
default: "1.1"
|
default: '1.1'
|
||||||
technical: Level 1.1
|
technical: Level 1.1
|
||||||
L13:
|
L13:
|
||||||
default: "1.3"
|
default: '1.3'
|
||||||
technical: Level 1.3
|
technical: Level 1.3
|
||||||
L2:
|
L2:
|
||||||
default: "2"
|
default: '2'
|
||||||
technical: Level 2
|
technical: Level 2
|
||||||
L21:
|
L21:
|
||||||
default: "2.1"
|
default: '2.1'
|
||||||
technical: Level 2.1
|
technical: Level 2.1
|
||||||
L22:
|
L22:
|
||||||
default: "2.2"
|
default: '2.2'
|
||||||
technical: Level 2.2
|
technical: Level 2.2
|
||||||
L3:
|
L3:
|
||||||
default: "3"
|
default: '3'
|
||||||
technical: Level 3
|
technical: Level 3
|
||||||
L31:
|
L31:
|
||||||
default: "3.1"
|
default: '3.1'
|
||||||
technical: Level 3.1
|
technical: Level 3.1
|
||||||
L32:
|
L32:
|
||||||
default: "3.2"
|
default: '3.2'
|
||||||
technical: Level 3.2
|
technical: Level 3.2
|
||||||
L4:
|
L4:
|
||||||
default: "4"
|
default: '4'
|
||||||
technical: Level 4
|
technical: Level 4
|
||||||
L41:
|
L41:
|
||||||
default: "4.1"
|
default: '4.1'
|
||||||
technical: Level 4.1
|
technical: Level 4.1
|
||||||
L42:
|
L42:
|
||||||
default: "4.2"
|
default: '4.2'
|
||||||
technical: Level 4.2
|
technical: Level 4.2
|
||||||
L5:
|
L5:
|
||||||
default: "5"
|
default: '5'
|
||||||
technical: Level 5
|
technical: Level 5
|
||||||
L51:
|
L51:
|
||||||
default: "5.1"
|
default: '5.1'
|
||||||
technical: Level 5.1
|
technical: Level 5.1
|
||||||
LOW:
|
LOW:
|
||||||
default: Low
|
default: Low
|
||||||
|
|
|
@ -106,11 +106,12 @@ class Ratio(Property[Decimal]):
|
||||||
if (width, height) == ('0', '1'): # identity
|
if (width, height) == ('0', '1'): # identity
|
||||||
return Decimal('1.0')
|
return Decimal('1.0')
|
||||||
|
|
||||||
result = round_decimal(Decimal(width) / Decimal(height), min_digits=1, max_digits=3)
|
if height:
|
||||||
if self.unit:
|
result = round_decimal(Decimal(width) / Decimal(height), min_digits=1, max_digits=3)
|
||||||
result *= self.unit
|
if self.unit:
|
||||||
|
result *= self.unit
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
self.report(value, context)
|
self.report(value, context)
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -103,10 +103,7 @@ class Provider:
|
||||||
|
|
||||||
value = prop.extract_value(track, context)
|
value = prop.extract_value(track, context)
|
||||||
if value is not None:
|
if value is not None:
|
||||||
if not prop.private:
|
which = props if not prop.private else pv_props
|
||||||
which = props
|
|
||||||
else:
|
|
||||||
which = pv_props
|
|
||||||
which[name] = value
|
which[name] = value
|
||||||
|
|
||||||
for name, rule in self.rules.get(track_type, {}).items():
|
for name, rule in self.rules.get(track_type, {}).items():
|
||||||
|
@ -116,8 +113,9 @@ class Provider:
|
||||||
|
|
||||||
value = rule.execute(props, pv_props, context)
|
value = rule.execute(props, pv_props, context)
|
||||||
if value is not None:
|
if value is not None:
|
||||||
props[name] = value
|
which = props if not rule.private else pv_props
|
||||||
elif name in props and not rule.override:
|
which[name] = value
|
||||||
|
elif name in props and (not rule.override or props[name] is None):
|
||||||
del props[name]
|
del props[name]
|
||||||
|
|
||||||
return props
|
return props
|
||||||
|
|
|
@ -26,6 +26,7 @@ from knowit.rules import (
|
||||||
LanguageRule,
|
LanguageRule,
|
||||||
ResolutionRule,
|
ResolutionRule,
|
||||||
)
|
)
|
||||||
|
from knowit.rules.general import GuessTitleRule
|
||||||
from knowit.serializer import get_json_encoder
|
from knowit.serializer import get_json_encoder
|
||||||
from knowit.units import units
|
from knowit.units import units
|
||||||
from knowit.utils import to_dict
|
from knowit.utils import to_dict
|
||||||
|
@ -83,17 +84,20 @@ class EnzymeProvider(Provider):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'video': {
|
'video': {
|
||||||
'language': LanguageRule('video language'),
|
'guessed': GuessTitleRule('guessed properties', private=True),
|
||||||
|
'language': LanguageRule('video language', override=True),
|
||||||
'resolution': ResolutionRule('video resolution'),
|
'resolution': ResolutionRule('video resolution'),
|
||||||
},
|
},
|
||||||
'audio': {
|
'audio': {
|
||||||
'language': LanguageRule('audio language'),
|
'guessed': GuessTitleRule('guessed properties', private=True),
|
||||||
|
'language': LanguageRule('audio language', override=True),
|
||||||
'channels': AudioChannelsRule('audio channels'),
|
'channels': AudioChannelsRule('audio channels'),
|
||||||
},
|
},
|
||||||
'subtitle': {
|
'subtitle': {
|
||||||
'language': LanguageRule('subtitle language'),
|
'guessed': GuessTitleRule('guessed properties', private=True),
|
||||||
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired'),
|
'language': LanguageRule('subtitle language', override=True),
|
||||||
'closed_caption': ClosedCaptionRule('closed caption'),
|
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True),
|
||||||
|
'closed_caption': ClosedCaptionRule('closed caption', override=True),
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -130,7 +134,8 @@ class EnzymeProvider(Provider):
|
||||||
|
|
||||||
if logger.level == logging.DEBUG:
|
if logger.level == logging.DEBUG:
|
||||||
logger.debug('Video {video_path} scanned using Enzyme {version} has raw data:\n{data}',
|
logger.debug('Video {video_path} scanned using Enzyme {version} has raw data:\n{data}',
|
||||||
video_path=video_path, version=enzyme.__version__, data=json.dumps(data))
|
video_path=video_path, version=enzyme.__version__,
|
||||||
|
data=json.dumps(data, cls=get_json_encoder(context), indent=4, ensure_ascii=False))
|
||||||
|
|
||||||
result = self._describe_tracks(video_path, data.get('info', {}), data.get('video_tracks'),
|
result = self._describe_tracks(video_path, data.get('info', {}), data.get('video_tracks'),
|
||||||
data.get('audio_tracks'), data.get('subtitle_tracks'), context)
|
data.get('audio_tracks'), data.get('subtitle_tracks'), context)
|
||||||
|
|
|
@ -34,6 +34,7 @@ from knowit.rules import (
|
||||||
LanguageRule,
|
LanguageRule,
|
||||||
ResolutionRule,
|
ResolutionRule,
|
||||||
)
|
)
|
||||||
|
from knowit.rules.general import GuessTitleRule
|
||||||
from knowit.serializer import get_json_encoder
|
from knowit.serializer import get_json_encoder
|
||||||
from knowit.units import units
|
from knowit.units import units
|
||||||
from knowit.utils import (
|
from knowit.utils import (
|
||||||
|
@ -77,7 +78,7 @@ class FFmpegExecutor:
|
||||||
def extract_info(self, filename):
|
def extract_info(self, filename):
|
||||||
"""Extract media info."""
|
"""Extract media info."""
|
||||||
json_dump = self._execute(filename)
|
json_dump = self._execute(filename)
|
||||||
return json.loads(json_dump)
|
return json.loads(json_dump) if json_dump else {}
|
||||||
|
|
||||||
def _execute(self, filename):
|
def _execute(self, filename):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
@ -144,7 +145,7 @@ class FFmpegProvider(Provider):
|
||||||
'id': Basic('index', data_type=int, allow_fallback=True, description='video track number'),
|
'id': Basic('index', data_type=int, allow_fallback=True, description='video track number'),
|
||||||
'name': Property('tags.title', description='video track name'),
|
'name': Property('tags.title', description='video track name'),
|
||||||
'language': Language('tags.language', description='video language'),
|
'language': Language('tags.language', description='video language'),
|
||||||
'duration': Duration('duration', description='video duration'),
|
'duration': Duration('duration', 'tags.duration', description='video duration'),
|
||||||
'width': Quantity('width', unit=units.pixel),
|
'width': Quantity('width', unit=units.pixel),
|
||||||
'height': Quantity('height', unit=units.pixel),
|
'height': Quantity('height', unit=units.pixel),
|
||||||
'scan_type': ScanType(config, 'field_order', default='Progressive', description='video scan type'),
|
'scan_type': ScanType(config, 'field_order', default='Progressive', description='video scan type'),
|
||||||
|
@ -153,7 +154,7 @@ class FFmpegProvider(Provider):
|
||||||
'resolution': None, # populated with ResolutionRule
|
'resolution': None, # populated with ResolutionRule
|
||||||
'frame_rate': Ratio('r_frame_rate', unit=units.FPS, description='video frame rate'),
|
'frame_rate': Ratio('r_frame_rate', unit=units.FPS, description='video frame rate'),
|
||||||
# frame_rate_mode
|
# frame_rate_mode
|
||||||
'bit_rate': Quantity('bit_rate', unit=units.bps, description='video bit rate'),
|
'bit_rate': Quantity('bit_rate', 'tags.bps', unit=units.bps, description='video bit rate'),
|
||||||
'bit_depth': Quantity('bits_per_raw_sample', unit=units.bit, description='video bit depth'),
|
'bit_depth': Quantity('bits_per_raw_sample', unit=units.bit, description='video bit depth'),
|
||||||
'codec': VideoCodec(config, 'codec_name', description='video codec'),
|
'codec': VideoCodec(config, 'codec_name', description='video codec'),
|
||||||
'profile': VideoProfile(config, 'profile', description='video codec profile'),
|
'profile': VideoProfile(config, 'profile', description='video codec profile'),
|
||||||
|
@ -166,13 +167,13 @@ class FFmpegProvider(Provider):
|
||||||
'id': Basic('index', data_type=int, allow_fallback=True, description='audio track number'),
|
'id': Basic('index', data_type=int, allow_fallback=True, description='audio track number'),
|
||||||
'name': Property('tags.title', description='audio track name'),
|
'name': Property('tags.title', description='audio track name'),
|
||||||
'language': Language('tags.language', description='audio language'),
|
'language': Language('tags.language', description='audio language'),
|
||||||
'duration': Duration('duration', description='audio duration'),
|
'duration': Duration('duration', 'tags.duration', description='audio duration'),
|
||||||
'codec': AudioCodec(config, 'profile', 'codec_name', description='audio codec'),
|
'codec': AudioCodec(config, 'profile', 'codec_name', description='audio codec'),
|
||||||
'profile': AudioProfile(config, 'profile', description='audio codec profile'),
|
'profile': AudioProfile(config, 'profile', description='audio codec profile'),
|
||||||
'channels_count': AudioChannels('channels', description='audio channels count'),
|
'channels_count': AudioChannels('channels', description='audio channels count'),
|
||||||
'channels': None, # populated with AudioChannelsRule
|
'channels': None, # populated with AudioChannelsRule
|
||||||
'bit_depth': Quantity('bits_per_raw_sample', unit=units.bit, description='audio bit depth'),
|
'bit_depth': Quantity('bits_per_raw_sample', unit=units.bit, description='audio bit depth'),
|
||||||
'bit_rate': Quantity('bit_rate', unit=units.bps, description='audio bit rate'),
|
'bit_rate': Quantity('bit_rate', 'tags.bps', unit=units.bps, description='audio bit rate'),
|
||||||
'sampling_rate': Quantity('sample_rate', unit=units.Hz, description='audio sampling rate'),
|
'sampling_rate': Quantity('sample_rate', unit=units.Hz, description='audio sampling rate'),
|
||||||
'forced': YesNo('disposition.forced', hide_value=False, description='audio track forced'),
|
'forced': YesNo('disposition.forced', hide_value=False, description='audio track forced'),
|
||||||
'default': YesNo('disposition.default', hide_value=False, description='audio track default'),
|
'default': YesNo('disposition.default', hide_value=False, description='audio track default'),
|
||||||
|
@ -190,17 +191,20 @@ class FFmpegProvider(Provider):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'video': {
|
'video': {
|
||||||
'language': LanguageRule('video language'),
|
'guessed': GuessTitleRule('guessed properties', private=True),
|
||||||
|
'language': LanguageRule('video language', override=True),
|
||||||
'resolution': ResolutionRule('video resolution'),
|
'resolution': ResolutionRule('video resolution'),
|
||||||
},
|
},
|
||||||
'audio': {
|
'audio': {
|
||||||
'language': LanguageRule('audio language'),
|
'guessed': GuessTitleRule('guessed properties', private=True),
|
||||||
|
'language': LanguageRule('audio language', override=True),
|
||||||
'channels': AudioChannelsRule('audio channels'),
|
'channels': AudioChannelsRule('audio channels'),
|
||||||
},
|
},
|
||||||
'subtitle': {
|
'subtitle': {
|
||||||
'language': LanguageRule('subtitle language'),
|
'guessed': GuessTitleRule('guessed properties', private=True),
|
||||||
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired'),
|
'language': LanguageRule('subtitle language', override=True),
|
||||||
'closed_caption': ClosedCaptionRule('closed caption'),
|
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True),
|
||||||
|
'closed_caption': ClosedCaptionRule('closed caption', override=True),
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
self.executor = FFmpegExecutor.get_executor_instance(suggested_path)
|
self.executor = FFmpegExecutor.get_executor_instance(suggested_path)
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
|
import ctypes
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
from ctypes import c_void_p, c_wchar_p
|
from ctypes import c_void_p, c_wchar_p
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
|
@ -43,6 +44,7 @@ from knowit.rules import (
|
||||||
LanguageRule,
|
LanguageRule,
|
||||||
ResolutionRule,
|
ResolutionRule,
|
||||||
)
|
)
|
||||||
|
from knowit.rules.general import GuessTitleRule
|
||||||
from knowit.units import units
|
from knowit.units import units
|
||||||
from knowit.utils import (
|
from knowit.utils import (
|
||||||
define_candidate,
|
define_candidate,
|
||||||
|
@ -77,7 +79,7 @@ class MediaInfoExecutor:
|
||||||
|
|
||||||
locations = {
|
locations = {
|
||||||
'unix': ('/usr/local/mediainfo/lib', '/usr/local/mediainfo/bin', '__PATH__'),
|
'unix': ('/usr/local/mediainfo/lib', '/usr/local/mediainfo/bin', '__PATH__'),
|
||||||
'windows': ('__PATH__', ),
|
'windows': ('C:\\Program Files\\MediaInfo', 'C:\\Program Files (x86)\\MediaInfo', '__PATH__'),
|
||||||
'macos': ('__PATH__', ),
|
'macos': ('__PATH__', ),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -121,12 +123,28 @@ class MediaInfoCliExecutor(MediaInfoExecutor):
|
||||||
}
|
}
|
||||||
|
|
||||||
def _execute(self, filename):
|
def _execute(self, filename):
|
||||||
return json.loads(check_output([self.location, '--Output=JSON', '--Full', filename]).decode())
|
data = check_output([self.location, '--Output=JSON', '--Full', filename]).decode()
|
||||||
|
|
||||||
|
return json.loads(data) if data else {}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _is_gui_exe(cls, candidate: str):
|
||||||
|
if not candidate.endswith('MediaInfo.exe') or not os.path.isfile(candidate):
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
shell32 = ctypes.WinDLL('shell32', use_last_error=True) # type: ignore
|
||||||
|
return bool(shell32.ExtractIconExW(candidate, 0, None, None, 1))
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(cls, os_family=None, suggested_path=None):
|
def create(cls, os_family=None, suggested_path=None):
|
||||||
"""Create the executor instance."""
|
"""Create the executor instance."""
|
||||||
for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path):
|
for candidate in define_candidate(cls.locations, cls.names, os_family, suggested_path):
|
||||||
|
if cls._is_gui_exe(candidate):
|
||||||
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
output = check_output([candidate, '--version']).decode()
|
output = check_output([candidate, '--version']).decode()
|
||||||
version = cls._get_version(output)
|
version = cls._get_version(output)
|
||||||
|
@ -154,7 +172,9 @@ class MediaInfoCTypesExecutor(MediaInfoExecutor):
|
||||||
|
|
||||||
def _execute(self, filename):
|
def _execute(self, filename):
|
||||||
# Create a MediaInfo handle
|
# Create a MediaInfo handle
|
||||||
return json.loads(MediaInfo.parse(filename, library_file=self.location, output='JSON'))
|
data = MediaInfo.parse(filename, library_file=self.location, output='JSON')
|
||||||
|
|
||||||
|
return json.loads(data) if data else {}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(cls, os_family=None, suggested_path=None):
|
def create(cls, os_family=None, suggested_path=None):
|
||||||
|
@ -254,19 +274,22 @@ class MediaInfoProvider(Provider):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'video': {
|
'video': {
|
||||||
'language': LanguageRule('video language'),
|
'guessed': GuessTitleRule('guessed properties', private=True),
|
||||||
|
'language': LanguageRule('video language', override=True),
|
||||||
'resolution': ResolutionRule('video resolution'),
|
'resolution': ResolutionRule('video resolution'),
|
||||||
},
|
},
|
||||||
'audio': {
|
'audio': {
|
||||||
'language': LanguageRule('audio language'),
|
'guessed': GuessTitleRule('guessed properties', private=True),
|
||||||
|
'language': LanguageRule('audio language', override=True),
|
||||||
'channels': AudioChannelsRule('audio channels'),
|
'channels': AudioChannelsRule('audio channels'),
|
||||||
'_atmosrule': AtmosRule(config, 'atmos rule'),
|
'atmos': AtmosRule(config, 'atmos rule', private=True),
|
||||||
'_dtshdrule': DtsHdRule(config, 'dts-hd rule'),
|
'dtshd': DtsHdRule(config, 'dts-hd rule', private=True),
|
||||||
},
|
},
|
||||||
'subtitle': {
|
'subtitle': {
|
||||||
'language': LanguageRule('subtitle language'),
|
'guessed': GuessTitleRule('guessed properties', private=True),
|
||||||
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired'),
|
'language': LanguageRule('subtitle language', override=True),
|
||||||
'closed_caption': ClosedCaptionRule('closed caption'),
|
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True),
|
||||||
|
'closed_caption': ClosedCaptionRule('closed caption', override=True),
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
self.executor = MediaInfoExecutor.get_executor_instance(suggested_path)
|
self.executor = MediaInfoExecutor.get_executor_instance(suggested_path)
|
||||||
|
|
|
@ -28,6 +28,7 @@ from knowit.rules import (
|
||||||
LanguageRule,
|
LanguageRule,
|
||||||
ResolutionRule,
|
ResolutionRule,
|
||||||
)
|
)
|
||||||
|
from knowit.rules.general import GuessTitleRule
|
||||||
from knowit.serializer import get_json_encoder
|
from knowit.serializer import get_json_encoder
|
||||||
from knowit.units import units
|
from knowit.units import units
|
||||||
from knowit.utils import define_candidate, detect_os
|
from knowit.utils import define_candidate, detect_os
|
||||||
|
@ -67,7 +68,7 @@ class MkvMergeExecutor:
|
||||||
def extract_info(self, filename):
|
def extract_info(self, filename):
|
||||||
"""Extract media info."""
|
"""Extract media info."""
|
||||||
json_dump = self._execute(filename)
|
json_dump = self._execute(filename)
|
||||||
return json.loads(json_dump)
|
return json.loads(json_dump) if json_dump else {}
|
||||||
|
|
||||||
def _execute(self, filename):
|
def _execute(self, filename):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
@ -166,17 +167,20 @@ class MkvMergeProvider(Provider):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'video': {
|
'video': {
|
||||||
|
'guessed': GuessTitleRule('guessed properties', private=True),
|
||||||
'language': LanguageRule('video language', override=True),
|
'language': LanguageRule('video language', override=True),
|
||||||
'resolution': ResolutionRule('video resolution'),
|
'resolution': ResolutionRule('video resolution'),
|
||||||
},
|
},
|
||||||
'audio': {
|
'audio': {
|
||||||
|
'guessed': GuessTitleRule('guessed properties', private=True),
|
||||||
'language': LanguageRule('audio language', override=True),
|
'language': LanguageRule('audio language', override=True),
|
||||||
'channels': AudioChannelsRule('audio channels'),
|
'channels': AudioChannelsRule('audio channels'),
|
||||||
},
|
},
|
||||||
'subtitle': {
|
'subtitle': {
|
||||||
|
'guessed': GuessTitleRule('guessed properties', private=True),
|
||||||
'language': LanguageRule('subtitle language', override=True),
|
'language': LanguageRule('subtitle language', override=True),
|
||||||
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired'),
|
'hearing_impaired': HearingImpairedRule('subtitle hearing impaired', override=True),
|
||||||
'closed_caption': ClosedCaptionRule('closed caption'),
|
'closed_caption': ClosedCaptionRule('closed caption', override=True),
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
self.executor = MkvMergeExecutor.get_executor_instance(suggested_path)
|
self.executor = MkvMergeExecutor.get_executor_instance(suggested_path)
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
|
|
||||||
import re
|
|
||||||
from logging import NullHandler, getLogger
|
from logging import NullHandler, getLogger
|
||||||
|
|
||||||
import babelfish
|
from trakit.api import trakit
|
||||||
|
|
||||||
from knowit.core import Rule
|
from knowit.core import Rule
|
||||||
|
|
||||||
|
@ -10,22 +8,27 @@ logger = getLogger(__name__)
|
||||||
logger.addHandler(NullHandler())
|
logger.addHandler(NullHandler())
|
||||||
|
|
||||||
|
|
||||||
class LanguageRule(Rule):
|
class GuessTitleRule(Rule):
|
||||||
"""Language rules."""
|
"""Guess properties from track title."""
|
||||||
|
|
||||||
name_re = re.compile(r'(?P<name>\w+)\b', re.IGNORECASE)
|
|
||||||
|
|
||||||
def execute(self, props, pv_props, context):
|
def execute(self, props, pv_props, context):
|
||||||
"""Language detection using name."""
|
"""Language detection using name."""
|
||||||
if 'language' in props:
|
if 'name' in props:
|
||||||
|
language = props.get('language')
|
||||||
|
options = {'expected_language': language} if language else {}
|
||||||
|
guessed = trakit(props['name'], options)
|
||||||
|
if guessed:
|
||||||
|
return guessed
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageRule(Rule):
|
||||||
|
"""Language rules."""
|
||||||
|
|
||||||
|
def execute(self, props, pv_props, context):
|
||||||
|
"""Language detection using name."""
|
||||||
|
if 'guessed' not in pv_props:
|
||||||
return
|
return
|
||||||
|
|
||||||
if 'name' in props:
|
guess = pv_props['guessed']
|
||||||
name = props.get('name', '')
|
if 'language' in guess:
|
||||||
match = self.name_re.match(name)
|
return guess['language']
|
||||||
if match:
|
|
||||||
try:
|
|
||||||
return babelfish.Language.fromname(match.group('name'))
|
|
||||||
except babelfish.Error:
|
|
||||||
pass
|
|
||||||
logger.info('Invalid %s: %r', self.description, name)
|
|
||||||
|
|
|
@ -10,18 +10,19 @@ class ClosedCaptionRule(Rule):
|
||||||
|
|
||||||
def execute(self, props, pv_props, context):
|
def execute(self, props, pv_props, context):
|
||||||
"""Execute closed caption rule."""
|
"""Execute closed caption rule."""
|
||||||
for name in (pv_props.get('_closed_caption'), props.get('name')):
|
if '_closed_caption' in pv_props and self.cc_re.search(pv_props['_closed_caption']):
|
||||||
if name and self.cc_re.search(name):
|
return True
|
||||||
return True
|
|
||||||
|
if 'guessed' in pv_props:
|
||||||
|
guessed = pv_props['guessed']
|
||||||
|
return guessed.get('closed_caption')
|
||||||
|
|
||||||
|
|
||||||
class HearingImpairedRule(Rule):
|
class HearingImpairedRule(Rule):
|
||||||
"""Hearing Impaired rule."""
|
"""Hearing Impaired rule."""
|
||||||
|
|
||||||
hi_re = re.compile(r'(\bsdh\b)', re.IGNORECASE)
|
|
||||||
|
|
||||||
def execute(self, props, pv_props, context):
|
def execute(self, props, pv_props, context):
|
||||||
"""Hearing Impaired."""
|
"""Hearing Impaired."""
|
||||||
name = props.get('name')
|
if 'guessed' in pv_props:
|
||||||
if name and self.hi_re.search(name):
|
guessed = pv_props['guessed']
|
||||||
return True
|
return guessed.get('hearing_impaired')
|
||||||
|
|
|
@ -1,10 +1,5 @@
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
try:
|
|
||||||
import pint
|
|
||||||
except ImportError:
|
|
||||||
pint = False
|
|
||||||
|
|
||||||
|
|
||||||
class NullRegistry:
|
class NullRegistry:
|
||||||
"""A NullRegistry that masquerades as a pint.UnitRegistry."""
|
"""A NullRegistry that masquerades as a pint.UnitRegistry."""
|
||||||
|
@ -25,9 +20,18 @@ class NullRegistry:
|
||||||
|
|
||||||
|
|
||||||
def _build_unit_registry():
|
def _build_unit_registry():
|
||||||
registry = pint.UnitRegistry() if pint else NullRegistry()
|
try:
|
||||||
registry.define('FPS = 1 * hertz')
|
import pint
|
||||||
return registry
|
|
||||||
|
registry = pint.UnitRegistry()
|
||||||
|
registry.define('FPS = 1 * hertz')
|
||||||
|
|
||||||
|
pint.set_application_registry(registry)
|
||||||
|
return registry
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return NullRegistry()
|
||||||
|
|
||||||
|
|
||||||
units = _build_unit_registry()
|
units = _build_unit_registry()
|
||||||
|
|
|
@ -386,7 +386,7 @@ class MediaInfo:
|
||||||
A higher value will yield more precise results in some cases
|
A higher value will yield more precise results in some cases
|
||||||
but will also increase parsing time.
|
but will also increase parsing time.
|
||||||
:param bool full: display additional tags, including computer-readable values
|
:param bool full: display additional tags, including computer-readable values
|
||||||
for sizes and durations.
|
for sizes and durations, corresponds to the CLI's ``--Full``/``-f`` parameter.
|
||||||
:param bool legacy_stream_display: display additional information about streams.
|
:param bool legacy_stream_display: display additional information about streams.
|
||||||
:param dict mediainfo_options: additional options that will be passed to the
|
:param dict mediainfo_options: additional options that will be passed to the
|
||||||
`MediaInfo_Option` function, for example: ``{"Language": "raw"}``.
|
`MediaInfo_Option` function, for example: ``{"Language": "raw"}``.
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
__title__ = 'trakit'
|
||||||
|
__version__ = '0.2.1'
|
||||||
|
__short_version__ = '0.2'
|
||||||
|
__author__ = 'RatoAQ'
|
||||||
|
__license__ = 'MIT'
|
||||||
|
__url__ = 'https://github.com/ratoaq2/trakit'
|
||||||
|
|
||||||
|
from .api import TrakItApi, trakit
|
|
@ -0,0 +1,108 @@
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
import typing
|
||||||
|
|
||||||
|
import babelfish
|
||||||
|
|
||||||
|
from trakit import TrakItApi, __version__
|
||||||
|
|
||||||
|
logging.basicConfig(stream=sys.stdout, format='%(message)s')
|
||||||
|
logging.getLogger('CONSOLE').setLevel(logging.INFO)
|
||||||
|
logging.getLogger('trakit').setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
console = logging.getLogger('CONSOLE')
|
||||||
|
logger = logging.getLogger('trakit')
|
||||||
|
|
||||||
|
|
||||||
|
def build_argument_parser() -> argparse.ArgumentParser:
|
||||||
|
"""Build the argument parser."""
|
||||||
|
opts = argparse.ArgumentParser()
|
||||||
|
opts.add_argument(
|
||||||
|
dest='value',
|
||||||
|
help='track title to guess',
|
||||||
|
type=str,
|
||||||
|
)
|
||||||
|
|
||||||
|
conf_opts = opts.add_argument_group('Configuration')
|
||||||
|
conf_opts.add_argument(
|
||||||
|
'-l',
|
||||||
|
'--expected-language',
|
||||||
|
dest='expected_language',
|
||||||
|
help='The expected language to be guessed',
|
||||||
|
type=str,
|
||||||
|
)
|
||||||
|
|
||||||
|
output_opts = opts.add_argument_group('Output')
|
||||||
|
output_opts.add_argument(
|
||||||
|
'--debug',
|
||||||
|
action='store_true',
|
||||||
|
dest='debug',
|
||||||
|
help='Print information for debugging trakit and for reporting bugs.'
|
||||||
|
)
|
||||||
|
output_opts.add_argument(
|
||||||
|
'-y',
|
||||||
|
'--yaml',
|
||||||
|
action='store_true',
|
||||||
|
dest='yaml',
|
||||||
|
help='Display output in yaml format'
|
||||||
|
)
|
||||||
|
|
||||||
|
information_opts = opts.add_argument_group('Information')
|
||||||
|
information_opts.add_argument('--version', action='version', version=__version__)
|
||||||
|
|
||||||
|
return opts
|
||||||
|
|
||||||
|
|
||||||
|
def _as_yaml(value: str, info: typing.Mapping[str, typing.Any]) -> str:
|
||||||
|
"""Convert info to string using YAML format."""
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
def default_representer(r: yaml.representer.SafeRepresenter, data: typing.Any):
|
||||||
|
return r.represent_scalar('tag:yaml.org,2002:str', str(data))
|
||||||
|
|
||||||
|
yaml.representer.SafeRepresenter.add_representer(babelfish.Language, default_representer)
|
||||||
|
|
||||||
|
return yaml.safe_dump({value: dict(info)}, allow_unicode=True, sort_keys=False)
|
||||||
|
|
||||||
|
|
||||||
|
def _as_json(info: typing.Mapping[str, typing.Any]) -> str:
|
||||||
|
"""Convert info to string using JSON format."""
|
||||||
|
return json.dumps(info, ensure_ascii=False, indent=2, default=str)
|
||||||
|
|
||||||
|
|
||||||
|
def dump(value: str, info: typing.Mapping[str, typing.Any], opts: argparse.Namespace) -> str:
|
||||||
|
"""Convert info to string using json or yaml format."""
|
||||||
|
if opts.yaml:
|
||||||
|
return _as_yaml(value, info)
|
||||||
|
|
||||||
|
return _as_json(info)
|
||||||
|
|
||||||
|
|
||||||
|
def trakit(value: str, opts: argparse.Namespace) -> typing.Mapping:
|
||||||
|
"""Extract video metadata."""
|
||||||
|
if not opts.yaml:
|
||||||
|
console.info('Parsing: %s', value)
|
||||||
|
options = {k: v for k, v in vars(opts).items() if v is not None}
|
||||||
|
info = TrakItApi().trakit(value, options)
|
||||||
|
console.info('TrakIt %s found: ', __version__)
|
||||||
|
console.info(dump(value, info, opts))
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
def main(args: typing.Optional[typing.List[str]] = None):
|
||||||
|
"""Execute main function for entry point."""
|
||||||
|
argument_parser = build_argument_parser()
|
||||||
|
args = args or sys.argv[1:]
|
||||||
|
opts = argument_parser.parse_args(args)
|
||||||
|
|
||||||
|
if opts.debug:
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
logging.getLogger('rebulk').setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
return trakit(opts.value, opts)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(sys.argv[1:])
|
|
@ -0,0 +1,24 @@
|
||||||
|
import typing
|
||||||
|
|
||||||
|
from trakit.config import Config
|
||||||
|
from trakit.context import Context
|
||||||
|
from trakit.patterns import configure
|
||||||
|
|
||||||
|
|
||||||
|
class TrakItApi:
|
||||||
|
|
||||||
|
def __init__(self, config: typing.Optional[typing.Mapping[str, typing.Any]] = None):
|
||||||
|
self.rebulk = configure(Config(config))
|
||||||
|
|
||||||
|
def trakit(self, string: str, options: typing.Optional[typing.Mapping[str, typing.Any]] = None):
|
||||||
|
"""Return a mapping of extracted information."""
|
||||||
|
matches = self.rebulk.matches(string, Context(options))
|
||||||
|
guess: typing.Mapping[str, typing.Any] = matches.to_dict()
|
||||||
|
return guess
|
||||||
|
|
||||||
|
|
||||||
|
default_api = TrakItApi()
|
||||||
|
|
||||||
|
|
||||||
|
def trakit(string: str, options: typing.Optional[typing.Mapping[str, typing.Any]] = None):
|
||||||
|
return default_api.trakit(string, options)
|
|
@ -0,0 +1,19 @@
|
||||||
|
import json
|
||||||
|
import typing
|
||||||
|
|
||||||
|
from pkg_resources import resource_stream
|
||||||
|
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
def __init__(self, config: typing.Optional[typing.Mapping[str, typing.Any]]):
|
||||||
|
with resource_stream('trakit', 'data/config.json') as f:
|
||||||
|
cfg: typing.Dict[str, typing.Any] = json.load(f)
|
||||||
|
if config:
|
||||||
|
cfg.update(config)
|
||||||
|
|
||||||
|
self.ignored: typing.Set[str] = set(cfg.get('ignored', []))
|
||||||
|
self.countries: typing.Mapping[str, str] = cfg.get('countries', {})
|
||||||
|
self.languages: typing.Mapping[str, str] = cfg.get('languages', {})
|
||||||
|
self.scripts: typing.Mapping[str, str] = cfg.get('scripts', {})
|
||||||
|
self.regions: typing.Mapping[str, str] = cfg.get('regions', {})
|
||||||
|
self.implicit_languages: typing.Mapping[str, str] = cfg.get('implicit-languages', {})
|
|
@ -0,0 +1,22 @@
|
||||||
|
import typing
|
||||||
|
|
||||||
|
import babelfish
|
||||||
|
|
||||||
|
|
||||||
|
class Context(dict):
|
||||||
|
def __init__(self, options: typing.Optional[typing.Mapping[str, typing.Any]] = None):
|
||||||
|
super().__init__(options or {})
|
||||||
|
language = self['expected_language'] if 'expected_language' in self else None
|
||||||
|
if language and not isinstance(language, babelfish.Language):
|
||||||
|
language = babelfish.Language.fromietf(str(language))
|
||||||
|
self.expected_language: typing.Optional[babelfish.Language] = language
|
||||||
|
|
||||||
|
def accept(self, lang: babelfish.Language):
|
||||||
|
if self.expected_language is None:
|
||||||
|
return True
|
||||||
|
if self.expected_language.alpha3 != lang.alpha3:
|
||||||
|
return False
|
||||||
|
if self.expected_language.script and self.expected_language != lang.script:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return not self.expected_language.country or self.expected_language == lang.country
|
|
@ -0,0 +1,32 @@
|
||||||
|
import typing
|
||||||
|
|
||||||
|
from babelfish import Country, CountryReverseConverter, CountryReverseError
|
||||||
|
from babelfish.converters import CaseInsensitiveDict
|
||||||
|
|
||||||
|
|
||||||
|
class GuessCountryConverter(CountryReverseConverter):
|
||||||
|
def __init__(self, config: typing.Mapping[str, str]):
|
||||||
|
self.synonyms = CaseInsensitiveDict(config)
|
||||||
|
|
||||||
|
def convert(self, alpha2):
|
||||||
|
return str(Country(alpha2))
|
||||||
|
|
||||||
|
def reverse(self, name: str):
|
||||||
|
try:
|
||||||
|
return self.synonyms[name]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if name.isupper() and len(name) == 2:
|
||||||
|
try:
|
||||||
|
return Country(name).alpha2
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
for conv in (Country.fromname,):
|
||||||
|
try:
|
||||||
|
return conv(name).alpha2
|
||||||
|
except CountryReverseError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
raise CountryReverseError(name)
|
|
@ -0,0 +1,30 @@
|
||||||
|
import typing
|
||||||
|
|
||||||
|
from babelfish import Language, LanguageReverseConverter, LanguageReverseError
|
||||||
|
from babelfish.converters import CaseInsensitiveDict
|
||||||
|
|
||||||
|
|
||||||
|
class GuessLanguageConverter(LanguageReverseConverter):
|
||||||
|
def __init__(self, config: typing.Mapping[str, str]):
|
||||||
|
self.synonyms = CaseInsensitiveDict()
|
||||||
|
for synonym, code in config.items():
|
||||||
|
lang = Language.fromietf(code) if '-' in code else Language(code)
|
||||||
|
self.synonyms[synonym] = (lang.alpha3, lang.country.alpha2 if lang.country else None, lang.script)
|
||||||
|
|
||||||
|
def convert(self, alpha3: str, country=None, script=None):
|
||||||
|
return str(Language(alpha3, country, script))
|
||||||
|
|
||||||
|
def reverse(self, name: str):
|
||||||
|
try:
|
||||||
|
return self.synonyms[name]
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
for conv in (Language.fromname,):
|
||||||
|
try:
|
||||||
|
reverse = conv(name)
|
||||||
|
return reverse.alpha3, reverse.country, reverse.script
|
||||||
|
except (ValueError, LanguageReverseError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
raise LanguageReverseError(name)
|
|
@ -0,0 +1,860 @@
|
||||||
|
{
|
||||||
|
"countries": {
|
||||||
|
"Afghan": "AF",
|
||||||
|
"Aforika Borwa": "ZA",
|
||||||
|
"Afrika Borwa": "ZA",
|
||||||
|
"Afrika Dzonga": "ZA",
|
||||||
|
"Afurika Tshipembe": "ZA",
|
||||||
|
"Aland": "AX",
|
||||||
|
"Alandish": "AX",
|
||||||
|
"Albanian": "AL",
|
||||||
|
"Algerian": "DZ",
|
||||||
|
"American": "US",
|
||||||
|
"American Islander": "UM",
|
||||||
|
"American Samoan": "AS",
|
||||||
|
"American Virgin Islander": "VI",
|
||||||
|
"Andorran": "AD",
|
||||||
|
"Angolan": "AO",
|
||||||
|
"Anguillian": "AI",
|
||||||
|
"Antarctican": "AQ",
|
||||||
|
"Antiguan Barbudan": "AG",
|
||||||
|
"Ao Men": "MO",
|
||||||
|
"Aotearoa": "NZ",
|
||||||
|
"Argentine": "AR",
|
||||||
|
"Armenian": "AM",
|
||||||
|
"Aruban": "AW",
|
||||||
|
"Australian": "AU",
|
||||||
|
"Austrian": "AT",
|
||||||
|
"Ayiti": "HT",
|
||||||
|
"Azerbaidzhan": "AZ",
|
||||||
|
"Azerbaijani": "AZ",
|
||||||
|
"Azərbaycan": "AZ",
|
||||||
|
"Bahamian": "BS",
|
||||||
|
"Bahraini": "BH",
|
||||||
|
"Bangladeshi": "BD",
|
||||||
|
"Barbadian": "BB",
|
||||||
|
"Beafrika": "CF",
|
||||||
|
"Belarusian": "BY",
|
||||||
|
"Belau": "PW",
|
||||||
|
"Belgian": "BE",
|
||||||
|
"Belgie": "BE",
|
||||||
|
"Belgien": "BE",
|
||||||
|
"Belgique": "BE",
|
||||||
|
"België": "BE",
|
||||||
|
"Belice": "BZ",
|
||||||
|
"Belizean": "BZ",
|
||||||
|
"Beninese": "BJ",
|
||||||
|
"Bermudian": "BM",
|
||||||
|
"Bhutanese": "BT",
|
||||||
|
"Blgariia": "BG",
|
||||||
|
"Bolivia": "BO",
|
||||||
|
"Bolivian": "BO",
|
||||||
|
"Boneiru Sint Eustatius y Saba": "BQ",
|
||||||
|
"Bosna i Hercegovina": "BA",
|
||||||
|
"Bosna i Khertsegovina": "BA",
|
||||||
|
"Bosnian Herzegovinian": "BA",
|
||||||
|
"Bouvetoya": "BV",
|
||||||
|
"Bouvetøya": "BV",
|
||||||
|
"Brasil": "BR",
|
||||||
|
"Brazilian": "BR",
|
||||||
|
"British": "GB",
|
||||||
|
"British Virgin Islander": "VG",
|
||||||
|
"British Virgin Islands": "VG",
|
||||||
|
"Bruneian": "BN",
|
||||||
|
"Bulgarian": "BG",
|
||||||
|
"Buliwya": "BO",
|
||||||
|
"Burkinabe": "BF",
|
||||||
|
"Burmese": "MM",
|
||||||
|
"Burundian": "BI",
|
||||||
|
"Bénin": "BJ",
|
||||||
|
"Bêafrîka": "CF",
|
||||||
|
"Cabo Verde": "CV",
|
||||||
|
"Cambodian": "KH",
|
||||||
|
"Cameroonian": "CM",
|
||||||
|
"Cameroun": "CM",
|
||||||
|
"Canadian": "CA",
|
||||||
|
"Cape Verdian": "CV",
|
||||||
|
"Caribisch Nederland": "BQ",
|
||||||
|
"Caymanian": "KY",
|
||||||
|
"Central African": "CF",
|
||||||
|
"Cesko": "CZ",
|
||||||
|
"Chadian": "TD",
|
||||||
|
"Channel Islander": "JE",
|
||||||
|
"Chilean": "CL",
|
||||||
|
"Chinese": "CN",
|
||||||
|
"Christmas Islander": "CX",
|
||||||
|
"Cocos Islander": "CC",
|
||||||
|
"Cocos Keeling Islands": "CC",
|
||||||
|
"Colombian": "CO",
|
||||||
|
"Comoran": "KM",
|
||||||
|
"Comores": "KM",
|
||||||
|
"Congolese": "CD",
|
||||||
|
"Cook Islander": "CK",
|
||||||
|
"Costa Rican": "CR",
|
||||||
|
"Cote dIvoire": "CI",
|
||||||
|
"Croatian": "HR",
|
||||||
|
"Cuban": "CU",
|
||||||
|
"Curacao": "CW",
|
||||||
|
"Curacaoan": "CW",
|
||||||
|
"Curaçaoan": "CW",
|
||||||
|
"Cypriot": "CY",
|
||||||
|
"Czech": "CZ",
|
||||||
|
"Côte dIvoire": "CI",
|
||||||
|
"Danish": "DK",
|
||||||
|
"Danmark": "DK",
|
||||||
|
"Deutschland": "DE",
|
||||||
|
"Dgernesiais": "GG",
|
||||||
|
"Dgèrnésiais": "GG",
|
||||||
|
"Ditunga dia Kongu wa Mungalaata": "CD",
|
||||||
|
"Dominican": "DO",
|
||||||
|
"Dutch": "NL",
|
||||||
|
"East Timorese": "TL",
|
||||||
|
"Ecuadorean": "EC",
|
||||||
|
"Eesti": "EE",
|
||||||
|
"Egyptian": "EG",
|
||||||
|
"Eire": "IE",
|
||||||
|
"Ellada": "GR",
|
||||||
|
"Emirati": "AE",
|
||||||
|
"Equatorial Guinean": "GQ",
|
||||||
|
"Eritrean": "ER",
|
||||||
|
"Espana": "ES",
|
||||||
|
"España": "ES",
|
||||||
|
"Estados Unidos": "US",
|
||||||
|
"Estonian": "EE",
|
||||||
|
"Eswatini": "SZ",
|
||||||
|
"Ethiopian": "ET",
|
||||||
|
"Faereyjar": "FO",
|
||||||
|
"Faeroerne": "FO",
|
||||||
|
"Falkland Islander": "FK",
|
||||||
|
"Falkland Islands": "FK",
|
||||||
|
"Faroese": "FO",
|
||||||
|
"Fijian": "FJ",
|
||||||
|
"Filipino": "PH",
|
||||||
|
"Finnish": "FI",
|
||||||
|
"Foroyar": "FO",
|
||||||
|
"French": "FR",
|
||||||
|
"French Polynesian": "PF",
|
||||||
|
"Færeyjar": "FO",
|
||||||
|
"Færøerne": "FO",
|
||||||
|
"Føroyar": "FO",
|
||||||
|
"Gabonese": "GA",
|
||||||
|
"Gambian": "GM",
|
||||||
|
"Georgian": "GE",
|
||||||
|
"German": "DE",
|
||||||
|
"Ghanaian": "GH",
|
||||||
|
"Greek": "GR",
|
||||||
|
"Greenlandic": "GL",
|
||||||
|
"Grenadian": "GD",
|
||||||
|
"Guadeloupian": "GP",
|
||||||
|
"Guahan": "GU",
|
||||||
|
"Guamanian": "GU",
|
||||||
|
"Guatemalan": "GT",
|
||||||
|
"Guernesey": "GG",
|
||||||
|
"Guianan": "GF",
|
||||||
|
"Guine Bissau": "GW",
|
||||||
|
"Guine Equatorial": "GQ",
|
||||||
|
"Guinea Bissauan": "GW",
|
||||||
|
"Guinea Ecuatorial": "GQ",
|
||||||
|
"Guinean": "GN",
|
||||||
|
"Guinee": "GN",
|
||||||
|
"Guinee equatoriale": "GQ",
|
||||||
|
"Guiné Bissau": "GW",
|
||||||
|
"Guiné Equatorial": "GQ",
|
||||||
|
"Guinée": "GN",
|
||||||
|
"Guinée équatoriale": "GQ",
|
||||||
|
"Guyane francaise": "GF",
|
||||||
|
"Guyane française": "GF",
|
||||||
|
"Guyanese": "GY",
|
||||||
|
"Guåhån": "GU",
|
||||||
|
"Haitian": "HT",
|
||||||
|
"Hayastan": "AM",
|
||||||
|
"Haïti": "HT",
|
||||||
|
"Heard and McDonald Islander": "HM",
|
||||||
|
"Honduran": "HN",
|
||||||
|
"Hong Konger": "HK",
|
||||||
|
"Hrvatska": "HR",
|
||||||
|
"Hungarian": "HU",
|
||||||
|
"I Kiribati": "KI",
|
||||||
|
"Icelander": "IS",
|
||||||
|
"Indian": "IN",
|
||||||
|
"Indonesian": "ID",
|
||||||
|
"Iranian": "IR",
|
||||||
|
"Iraqi": "IQ",
|
||||||
|
"Irish": "IE",
|
||||||
|
"Island": "IS",
|
||||||
|
"Israeli": "IL",
|
||||||
|
"Italia": "IT",
|
||||||
|
"Italian": "IT",
|
||||||
|
"Ivorian": "CI",
|
||||||
|
"Jamaican": "JM",
|
||||||
|
"Jamhuri ya Kidemokrasia ya Kongo": "CD",
|
||||||
|
"Japanese": "JP",
|
||||||
|
"Jerri": "JE",
|
||||||
|
"Jordanian": "JO",
|
||||||
|
"Jèrri": "JE",
|
||||||
|
"Kalaallit Nunaat": "GL",
|
||||||
|
"Kampuchea": "KH",
|
||||||
|
"Kazakhstani": "KZ",
|
||||||
|
"Kazakstan": "KZ",
|
||||||
|
"Kenyan": "KE",
|
||||||
|
"Kibris": "CY",
|
||||||
|
"Kirghiz": "KG",
|
||||||
|
"Kirgiziia": "KG",
|
||||||
|
"Kittitian or Nevisian": "KN",
|
||||||
|
"Komori": "KM",
|
||||||
|
"Kuki Airani": "CK",
|
||||||
|
"Kupros": "CY",
|
||||||
|
"Kuwaiti": "KW",
|
||||||
|
"Kâmpŭchéa": "KH",
|
||||||
|
"Kıbrıs": "CY",
|
||||||
|
"Kūki Āirani": "CK",
|
||||||
|
"La Reunion": "RE",
|
||||||
|
"La Réunion": "RE",
|
||||||
|
"Laotian": "LA",
|
||||||
|
"Latvian": "LV",
|
||||||
|
"Latvija": "LV",
|
||||||
|
"Lebanese": "LB",
|
||||||
|
"Letzebuerg": "LU",
|
||||||
|
"Liban": "LB",
|
||||||
|
"Liberian": "LR",
|
||||||
|
"Libyan": "LY",
|
||||||
|
"Liechtensteiner": "LI",
|
||||||
|
"Lietuva": "LT",
|
||||||
|
"Lithuanian": "LT",
|
||||||
|
"Luxembourger": "LU",
|
||||||
|
"Luxemburg": "LU",
|
||||||
|
"Lëtzebuerg": "LU",
|
||||||
|
"Macanese": "MO",
|
||||||
|
"Macau": "MO",
|
||||||
|
"Macedonian": "MK",
|
||||||
|
"Madagasikara": "MG",
|
||||||
|
"Magyarorszag": "HU",
|
||||||
|
"Magyarország": "HU",
|
||||||
|
"Mahoran": "YT",
|
||||||
|
"Majel": "MH",
|
||||||
|
"Makedonija": "MK",
|
||||||
|
"Makedonski": "MK",
|
||||||
|
"Malagasy": "MG",
|
||||||
|
"Malawian": "MW",
|
||||||
|
"Malaysian": "MY",
|
||||||
|
"Malaŵi": "MW",
|
||||||
|
"Maldivan": "MV",
|
||||||
|
"Malian": "ML",
|
||||||
|
"Maltese": "MT",
|
||||||
|
"Mannin": "IM",
|
||||||
|
"Manx": "IM",
|
||||||
|
"Marshallese": "MH",
|
||||||
|
"Martinican": "MQ",
|
||||||
|
"Maurice": "MU",
|
||||||
|
"Mauritanian": "MR",
|
||||||
|
"Mauritian": "MU",
|
||||||
|
"Mexican": "MX",
|
||||||
|
"Micronesia": "FM",
|
||||||
|
"Micronesian": "FM",
|
||||||
|
"Mocambique": "MZ",
|
||||||
|
"Moldova": "MD",
|
||||||
|
"Moldovan": "MD",
|
||||||
|
"Monegasque": "MC",
|
||||||
|
"Mongol uls": "MN",
|
||||||
|
"Mongolian": "MN",
|
||||||
|
"Montenegrin": "ME",
|
||||||
|
"Montserratian": "MS",
|
||||||
|
"Moris": "MU",
|
||||||
|
"Moroccan": "MA",
|
||||||
|
"Mosotho": "LS",
|
||||||
|
"Motswana": "BW",
|
||||||
|
"Mozambican": "MZ",
|
||||||
|
"Moçambique": "MZ",
|
||||||
|
"Mzantsi Afrika": "ZA",
|
||||||
|
"México": "MX",
|
||||||
|
"M̧ajeļ": "MH",
|
||||||
|
"Na Islas Marianas": "MP",
|
||||||
|
"Na Islas Mariånas": "MP",
|
||||||
|
"Namibian": "NA",
|
||||||
|
"Namibie": "NA",
|
||||||
|
"Namibië": "NA",
|
||||||
|
"Nauruan": "NR",
|
||||||
|
"Nederland": "NL",
|
||||||
|
"Negara Brunei Darussalam": "BN",
|
||||||
|
"Nepalese": "NP",
|
||||||
|
"New Caledonian": "NC",
|
||||||
|
"New Zealander": "NZ",
|
||||||
|
"Ni Vanuatu": "VU",
|
||||||
|
"Nicaraguan": "NI",
|
||||||
|
"Nigerian": "NG",
|
||||||
|
"Nigerien": "NE",
|
||||||
|
"Ningizimu Afrika": "ZA",
|
||||||
|
"Niuean": "NU",
|
||||||
|
"Niuē": "NU",
|
||||||
|
"Noreg": "NO",
|
||||||
|
"Norfk Ailen": "NF",
|
||||||
|
"Norfolk Islander": "NF",
|
||||||
|
"Norge": "NO",
|
||||||
|
"Norgga": "NO",
|
||||||
|
"North Korean": "KP",
|
||||||
|
"Norwegian": "NO",
|
||||||
|
"Nouvelle Caledonie": "NC",
|
||||||
|
"Nouvelle Calédonie": "NC",
|
||||||
|
"Omani": "OM",
|
||||||
|
"Osterreich": "AT",
|
||||||
|
"Owganystan": "AF",
|
||||||
|
"Ozbekiston": "UZ",
|
||||||
|
"O‘zbekiston": "UZ",
|
||||||
|
"Pais Korsou": "CW",
|
||||||
|
"Pais Kòrsou": "CW",
|
||||||
|
"Pakistani": "PK",
|
||||||
|
"Palauan": "PW",
|
||||||
|
"Palestinian": "PS",
|
||||||
|
"Panamanian": "PA",
|
||||||
|
"Panamá": "PA",
|
||||||
|
"Papua New Guinean": "PG",
|
||||||
|
"Papua Niu Gini": "PG",
|
||||||
|
"Papua Niugini": "PG",
|
||||||
|
"Paraguai": "PY",
|
||||||
|
"Paraguayan": "PY",
|
||||||
|
"Paraguái": "PY",
|
||||||
|
"Peruvian": "PE",
|
||||||
|
"Perú": "PE",
|
||||||
|
"Pilipinas": "PH",
|
||||||
|
"Piruw": "PE",
|
||||||
|
"Pitcairn Islander": "PN",
|
||||||
|
"Pitcairn Islands": "PN",
|
||||||
|
"Polish": "PL",
|
||||||
|
"Polska": "PL",
|
||||||
|
"Polynesie francaise": "PF",
|
||||||
|
"Polynésie française": "PF",
|
||||||
|
"Portuguese": "PT",
|
||||||
|
"Puerto Rican": "PR",
|
||||||
|
"Qatari": "QA",
|
||||||
|
"RD Congo": "CD",
|
||||||
|
"Repubilika ya Kongo": "CG",
|
||||||
|
"Repubilika ya Kongo Demokratiki": "CD",
|
||||||
|
"Republica Dominicana": "DO",
|
||||||
|
"Republiki ya Kongo": "CG",
|
||||||
|
"Republiki ya Kongo Demokratiki": "CD",
|
||||||
|
"Republiki ya Kongó Demokratiki": "CD",
|
||||||
|
"Republique centrafricaine": "CF",
|
||||||
|
"Republique du Congo": "CG",
|
||||||
|
"Republíki ya Kongó": "CG",
|
||||||
|
"República Dominicana": "DO",
|
||||||
|
"Reunionese": "RE",
|
||||||
|
"Ri Ben": "JP",
|
||||||
|
"Romanian": "RO",
|
||||||
|
"România": "RO",
|
||||||
|
"Rossiia": "RU",
|
||||||
|
"Russian": "RU",
|
||||||
|
"Rwandan": "RW",
|
||||||
|
"République centrafricaine": "CF",
|
||||||
|
"République du Congo": "CG",
|
||||||
|
"Réunionese": "RE",
|
||||||
|
"Sahara Occidental": "EH",
|
||||||
|
"Sahrawi": "EH",
|
||||||
|
"Saint Barthelemy": "BL",
|
||||||
|
"Saint Barthelemy Islander": "BL",
|
||||||
|
"Saint Barthélemy Islander": "BL",
|
||||||
|
"Saint Helena Ascension and Tristan da Cunha": "SH",
|
||||||
|
"Saint Helenian": "SH",
|
||||||
|
"Saint Lucian": "LC",
|
||||||
|
"Saint Martin": "MF",
|
||||||
|
"Saint Martin Islander": "MF",
|
||||||
|
"Saint Pierrais Miquelonnais": "PM",
|
||||||
|
"Saint Pierre et Miquelon": "PM",
|
||||||
|
"Saint Vincentian": "VC",
|
||||||
|
"Salvadoran": "SV",
|
||||||
|
"Sammarinese": "SM",
|
||||||
|
"Samoa Amelika": "AS",
|
||||||
|
"Samoan": "WS",
|
||||||
|
"Sao Tome e Principe": "ST",
|
||||||
|
"Sao Tomean": "ST",
|
||||||
|
"Saudi Arabian": "SA",
|
||||||
|
"Schweiz": "CH",
|
||||||
|
"Senegalese": "SN",
|
||||||
|
"Serbian": "RS",
|
||||||
|
"Sesel": "SC",
|
||||||
|
"Sewula Afrika": "ZA",
|
||||||
|
"Seychellois": "SC",
|
||||||
|
"Shqiperia": "AL",
|
||||||
|
"Shqipëria": "AL",
|
||||||
|
"Sierra Leonean": "SL",
|
||||||
|
"Singaporean": "SG",
|
||||||
|
"Singapura": "SG",
|
||||||
|
"Sint Maarten": "SX",
|
||||||
|
"Slovak": "SK",
|
||||||
|
"Slovene": "SI",
|
||||||
|
"Slovenija": "SI",
|
||||||
|
"Slovensko": "SK",
|
||||||
|
"Solomon Islander": "SB",
|
||||||
|
"Somali": "SO",
|
||||||
|
"Soomaaliya": "SO",
|
||||||
|
"South African": "ZA",
|
||||||
|
"South Georgia": "GS",
|
||||||
|
"South Georgian South Sandwich Islander": "GS",
|
||||||
|
"South Korean": "KR",
|
||||||
|
"South Sudanese": "SS",
|
||||||
|
"Spanish": "ES",
|
||||||
|
"Srbija": "RS",
|
||||||
|
"Sri Lankan": "LK",
|
||||||
|
"St Maartener": "SX",
|
||||||
|
"Sudanese": "SD",
|
||||||
|
"Suisse": "CH",
|
||||||
|
"Suomi": "FI",
|
||||||
|
"Surinamer": "SR",
|
||||||
|
"Svalbard og Jan Mayen": "SJ",
|
||||||
|
"Sverige": "SE",
|
||||||
|
"Svizra": "CH",
|
||||||
|
"Svizzera": "CH",
|
||||||
|
"Swazi": "SZ",
|
||||||
|
"Swedish": "SE",
|
||||||
|
"Swiss": "CH",
|
||||||
|
"Syrian": "SY",
|
||||||
|
"São Tomé e Príncipe": "ST",
|
||||||
|
"Sénégal": "SN",
|
||||||
|
"Sāmoa": "WS",
|
||||||
|
"Sāmoa Amelika": "AS",
|
||||||
|
"Tadzhik": "TJ",
|
||||||
|
"Tadzhikistan": "TJ",
|
||||||
|
"Tai Wan": "TW",
|
||||||
|
"Taiwanese": "TW",
|
||||||
|
"Tanzania": "TZ",
|
||||||
|
"Tanzanian": "TZ",
|
||||||
|
"Tchad": "TD",
|
||||||
|
"Terres australes et antarctiques francaises": "TF",
|
||||||
|
"Terres australes et antarctiques françaises": "TF",
|
||||||
|
"Thai": "TH",
|
||||||
|
"Timor Leste": "TL",
|
||||||
|
"Timór Leste": "TL",
|
||||||
|
"Tochikiston": "TJ",
|
||||||
|
"Togolese": "TG",
|
||||||
|
"Tokelauan": "TK",
|
||||||
|
"Tongan": "TO",
|
||||||
|
"Trinidadian": "TT",
|
||||||
|
"Tsrna Gora": "ME",
|
||||||
|
"Tunisian": "TN",
|
||||||
|
"Turkish": "TR",
|
||||||
|
"Turkiye": "TR",
|
||||||
|
"Turkmen": "TM",
|
||||||
|
"Turkmeniia": "TM",
|
||||||
|
"Turks and Caicos Islander": "TC",
|
||||||
|
"Tuvaluan": "TV",
|
||||||
|
"Türkiye": "TR",
|
||||||
|
"Türkmenistan": "TM",
|
||||||
|
"UK": "GB",
|
||||||
|
"US": "US",
|
||||||
|
"Uburundi": "BI",
|
||||||
|
"Ugandan": "UG",
|
||||||
|
"Ukrainian": "UA",
|
||||||
|
"Ukrayina": "UA",
|
||||||
|
"United States Virgin Islands": "VI",
|
||||||
|
"Uruguayan": "UY",
|
||||||
|
"Uzbekistani": "UZ",
|
||||||
|
"Vatican": "VA",
|
||||||
|
"Vaticanae": "VA",
|
||||||
|
"Vaticano": "VA",
|
||||||
|
"Vaticanæ": "VA",
|
||||||
|
"Venezuela": "VE",
|
||||||
|
"Venezuelan": "VE",
|
||||||
|
"Vietnam": "VN",
|
||||||
|
"Vietnamese": "VN",
|
||||||
|
"Viti": "FJ",
|
||||||
|
"Việt Nam": "VN",
|
||||||
|
"Volivia": "BO",
|
||||||
|
"Volívia": "BO",
|
||||||
|
"Wallis and Futuna Islander": "WF",
|
||||||
|
"Wallis et Futuna": "WF",
|
||||||
|
"Wuliwya": "BO",
|
||||||
|
"Xiang Gang": "HK",
|
||||||
|
"Xin Jia Po": "SG",
|
||||||
|
"Yemeni": "YE",
|
||||||
|
"Zambian": "ZM",
|
||||||
|
"Zhong Guo": "CN",
|
||||||
|
"Zhong Guo Da Lu": "CN",
|
||||||
|
"Zimbabwean": "ZW",
|
||||||
|
"`mn": "OM",
|
||||||
|
"baaNlaadesh": "BD",
|
||||||
|
"bbaart nuuN": "IN",
|
||||||
|
"bhaart": "IN",
|
||||||
|
"brug-yul-": "BT",
|
||||||
|
"canadien": "CA",
|
||||||
|
"cingkppuur": "SG",
|
||||||
|
"dhivehiraajeyge": "MV",
|
||||||
|
"eSwatini": "SZ",
|
||||||
|
"eereteraa": "ER",
|
||||||
|
"fGnstn": "AF",
|
||||||
|
"flsTyn": "PS",
|
||||||
|
"hangug": "KR",
|
||||||
|
"ilngkai": "LK",
|
||||||
|
"intiyaa": "IN",
|
||||||
|
"joseon": "KP",
|
||||||
|
"jybwty": "DJ",
|
||||||
|
"khoemry": "IQ",
|
||||||
|
"lSwml": "SO",
|
||||||
|
"l`rq": "IQ",
|
||||||
|
"lbHryn": "BH",
|
||||||
|
"lbnn": "LB",
|
||||||
|
"ljzyr": "DZ",
|
||||||
|
"lkwyt": "KW",
|
||||||
|
"lmGrb": "MA",
|
||||||
|
"lqmr": "KM",
|
||||||
|
"lrdn": "JO",
|
||||||
|
"lswdn": "SD",
|
||||||
|
"lyaman": "YE",
|
||||||
|
"lyby": "LY",
|
||||||
|
"mSr": "EG",
|
||||||
|
"mlysy": "MY",
|
||||||
|
"mnmaa": "MM",
|
||||||
|
"mwrytny": "MR",
|
||||||
|
"nepaal": "NP",
|
||||||
|
"phijii": "FJ",
|
||||||
|
"pkstn": "PK",
|
||||||
|
"praethsaithy": "TH",
|
||||||
|
"qTr": "QA",
|
||||||
|
"qwutnA": "IQ",
|
||||||
|
"rtry": "ER",
|
||||||
|
"sak`art`velo": "GE",
|
||||||
|
"shrii lNkaav": "LK",
|
||||||
|
"spplaaw": "LA",
|
||||||
|
"sryyl": "IL",
|
||||||
|
"swry": "SY",
|
||||||
|
"teyopheyaa": "ET",
|
||||||
|
"tshd": "TD",
|
||||||
|
"twns": "TN",
|
||||||
|
"ySHrAl": "IL",
|
||||||
|
"yrn": "IR",
|
||||||
|
"Åland": "AX",
|
||||||
|
"Ålandish": "AX",
|
||||||
|
"Éire": "IE",
|
||||||
|
"Ísland": "IS",
|
||||||
|
"Österreich": "AT",
|
||||||
|
"Česko": "CZ",
|
||||||
|
"Ελλάδα": "GR",
|
||||||
|
"Κύπρος": "CY",
|
||||||
|
"Азербайджан": "AZ",
|
||||||
|
"Белару́сь": "BY",
|
||||||
|
"Беларусь": "BY",
|
||||||
|
"Боснa и Херцеговина": "BA",
|
||||||
|
"България": "BG",
|
||||||
|
"Казахстан": "KZ",
|
||||||
|
"Киргизия": "KG",
|
||||||
|
"Кыргызстан": "KG",
|
||||||
|
"Македонија": "MK",
|
||||||
|
"Македонски": "MK",
|
||||||
|
"Монгол улс": "MN",
|
||||||
|
"Россия": "RU",
|
||||||
|
"Србија": "RS",
|
||||||
|
"Таджикистан": "TJ",
|
||||||
|
"Тоҷикистон": "TJ",
|
||||||
|
"Туркмения": "TM",
|
||||||
|
"Узбекистан": "UZ",
|
||||||
|
"Україна": "UA",
|
||||||
|
"Црна Гора": "ME",
|
||||||
|
"Қазақстан": "KZ",
|
||||||
|
"Հայաստան": "AM",
|
||||||
|
"ישראל": "IL",
|
||||||
|
"إرتريا": "ER",
|
||||||
|
"إسرائيل": "IL",
|
||||||
|
"افغانستان": "AF",
|
||||||
|
"الأردن": "JO",
|
||||||
|
"البحرين": "BH",
|
||||||
|
"الجزائر": "DZ",
|
||||||
|
"السعودية": "SA",
|
||||||
|
"السودان": "SD",
|
||||||
|
"الصحراء الغربية": "EH",
|
||||||
|
"الصومال": "SO",
|
||||||
|
"العراق": "IQ",
|
||||||
|
"العربية السعودية": "SA",
|
||||||
|
"القمر": "KM",
|
||||||
|
"الكويت": "KW",
|
||||||
|
"المغرب": "MA",
|
||||||
|
"اليَمَن": "YE",
|
||||||
|
"ایران": "IR",
|
||||||
|
"تشاد": "TD",
|
||||||
|
"تونس": "TN",
|
||||||
|
"جيبوتي": "DJ",
|
||||||
|
"دولة الإمارات العربية المتحدة": "AE",
|
||||||
|
"سوريا": "SY",
|
||||||
|
"عمان": "OM",
|
||||||
|
"فلسطين": "PS",
|
||||||
|
"قطر": "QA",
|
||||||
|
"لبنان": "LB",
|
||||||
|
"ليبيا": "LY",
|
||||||
|
"مصر": "EG",
|
||||||
|
"مليسيا": "MY",
|
||||||
|
"موريتانيا": "MR",
|
||||||
|
"پاكستان": "PK",
|
||||||
|
"کۆماری": "IQ",
|
||||||
|
"ܩܘܼܛܢܵܐ": "IQ",
|
||||||
|
"ދިވެހިރާއްޖޭގެ": "MV",
|
||||||
|
"नेपाल": "NP",
|
||||||
|
"फिजी": "FJ",
|
||||||
|
"भारत": "IN",
|
||||||
|
"বাংলাদেশ": "BD",
|
||||||
|
"ভারত": "IN",
|
||||||
|
"ਭਾਰਤ ਨੂੰ": "IN",
|
||||||
|
"இந்தியா": "IN",
|
||||||
|
"இலங்கை": "LK",
|
||||||
|
"சிங்கப்பூர்": "SG",
|
||||||
|
"ශ්රී ලංකාව": "LK",
|
||||||
|
"ประเทศไทย": "TH",
|
||||||
|
"ສປປລາວ": "LA",
|
||||||
|
"འབྲུག་ཡུལ་": "BT",
|
||||||
|
"မြန်မာ": "MM",
|
||||||
|
"საქართველო": "GE",
|
||||||
|
"ኢትዮጵያ": "ET",
|
||||||
|
"ኤርትራ": "ER",
|
||||||
|
"ⵍⵎⴰⵖⵔⵉⴱ": "MA",
|
||||||
|
"中国": "CN",
|
||||||
|
"中国大陆": "CN",
|
||||||
|
"台灣": "TW",
|
||||||
|
"新加坡": "SG",
|
||||||
|
"日本": "JP",
|
||||||
|
"澳门": "MO",
|
||||||
|
"香港": "HK",
|
||||||
|
"조선": "KP",
|
||||||
|
"한국": "KR"
|
||||||
|
},
|
||||||
|
"ignored": [
|
||||||
|
"bit",
|
||||||
|
"cc",
|
||||||
|
"ch",
|
||||||
|
"dan",
|
||||||
|
"day",
|
||||||
|
"gun",
|
||||||
|
"hr",
|
||||||
|
"jordan",
|
||||||
|
"la",
|
||||||
|
"ma",
|
||||||
|
"na",
|
||||||
|
"the",
|
||||||
|
"to"
|
||||||
|
],
|
||||||
|
"implicit-languages": {
|
||||||
|
"419": "es-419",
|
||||||
|
"BR": "pt-BR",
|
||||||
|
"CA": "fr-CA",
|
||||||
|
"Cantonese": "zh",
|
||||||
|
"Castilian": "es",
|
||||||
|
"FR": "fr-FR",
|
||||||
|
"GR": "ell",
|
||||||
|
"HK": "zh-HK",
|
||||||
|
"ID": "id-ID",
|
||||||
|
"Mandarin": "zh",
|
||||||
|
"Parisian": "fr-FR",
|
||||||
|
"Simplified": "zh-Hans",
|
||||||
|
"Traditional": "zh-Hant",
|
||||||
|
"UA": "uk-UA",
|
||||||
|
"UK": "en-GB",
|
||||||
|
"US": "en-US",
|
||||||
|
"VFF": "fr-FR",
|
||||||
|
"VFQ": "fr-CA",
|
||||||
|
"VN": "vie",
|
||||||
|
"cant": "zh",
|
||||||
|
"eng": "en",
|
||||||
|
"ita": "it",
|
||||||
|
"简体双语": "zh-Hans",
|
||||||
|
"繁体双语": "zh-Hant"
|
||||||
|
},
|
||||||
|
"languages": {
|
||||||
|
"Adygebze": "ady",
|
||||||
|
"Avanee": "grn",
|
||||||
|
"Avañeẽ": "grn",
|
||||||
|
"Aymar aru": "aym",
|
||||||
|
"Azərbaycan dili": "aze",
|
||||||
|
"Bahasa Indonesia": "ind",
|
||||||
|
"Bahasa Melayu": "msa",
|
||||||
|
"Basa Jawa": "jav",
|
||||||
|
"Basa Sunda": "sun",
|
||||||
|
"Belaruskaia": "bel",
|
||||||
|
"Blgarski": "bul",
|
||||||
|
"Bosanski": "bos",
|
||||||
|
"Brezhoneg": "bre",
|
||||||
|
"Catala": "cat",
|
||||||
|
"Català": "cat",
|
||||||
|
"Cestina": "ces",
|
||||||
|
"Cymraeg": "cym",
|
||||||
|
"Dansk": "dan",
|
||||||
|
"Davvisamegiella": "sme",
|
||||||
|
"Davvisámegiella": "sme",
|
||||||
|
"Deutsch": "deu",
|
||||||
|
"Dolnoserbscina": "dsb",
|
||||||
|
"Dolnoserbšćina": "dsb",
|
||||||
|
"Eesti": "est",
|
||||||
|
"Ellenika": "ell",
|
||||||
|
"Espanol": "spa",
|
||||||
|
"Espanol Latinoamerica": "es-419",
|
||||||
|
"Español": "spa",
|
||||||
|
"Español Latinoamérica": "es-419",
|
||||||
|
"Euskara": "eus",
|
||||||
|
"Foroyskt": "fao",
|
||||||
|
"Francais": "fra",
|
||||||
|
"Français": "fra",
|
||||||
|
"Frysk": "fry",
|
||||||
|
"Føroyskt": "fao",
|
||||||
|
"Gaeilge": "gle",
|
||||||
|
"Gaelg": "glv",
|
||||||
|
"Gaidhlig": "gla",
|
||||||
|
"Galego": "glg",
|
||||||
|
"Greek": "ell",
|
||||||
|
"Guang Dong Hua ": "zho",
|
||||||
|
"Gàidhlig": "gla",
|
||||||
|
"Hayeren": "hye",
|
||||||
|
"Hornjoserbscina": "hsb",
|
||||||
|
"Hornjoserbšćina": "hsb",
|
||||||
|
"Hrvatski": "hrv",
|
||||||
|
"Islenska": "isl",
|
||||||
|
"Italiano": "ita",
|
||||||
|
"Kazaksha": "kaz",
|
||||||
|
"Kernewek": "cor",
|
||||||
|
"Kiswahili": "swa",
|
||||||
|
"Kreyol": "hat",
|
||||||
|
"Kreyòl": "hat",
|
||||||
|
"Kurdi": "kur",
|
||||||
|
"Kurdî": "kur",
|
||||||
|
"Latviesu": "lav",
|
||||||
|
"Latviešu": "lav",
|
||||||
|
"Lemborgs": "lim",
|
||||||
|
"Letzebuergesch": "ltz",
|
||||||
|
"Lietuviu": "lit",
|
||||||
|
"Lietuvių": "lit",
|
||||||
|
"Lwo": "ach",
|
||||||
|
"Lèmbörgs": "lim",
|
||||||
|
"Lëtzebuergesch": "ltz",
|
||||||
|
"Magyar": "hun",
|
||||||
|
"Makedonski": "mkd",
|
||||||
|
"Malay": "msa",
|
||||||
|
"Malti": "mlt",
|
||||||
|
"Maya Kaqchikel": "cak",
|
||||||
|
"Melayu": "msa",
|
||||||
|
"Mongol": "mon",
|
||||||
|
"Nederlands": "nld",
|
||||||
|
"Norsk": "nor",
|
||||||
|
"Norsk bokmal": "nob",
|
||||||
|
"Norsk bokmål": "nob",
|
||||||
|
"Norsk nynorsk": "nno",
|
||||||
|
"Occitan": "oci",
|
||||||
|
"Ozbek": "uzb",
|
||||||
|
"Polski": "pol",
|
||||||
|
"Portugues": "por",
|
||||||
|
"Português": "por",
|
||||||
|
"Qhichwa": "que",
|
||||||
|
"Ri Ben Yu": "jpn",
|
||||||
|
"Romana": "ron",
|
||||||
|
"Română": "ron",
|
||||||
|
"Rumantsch": "roh",
|
||||||
|
"Russkii": "rus",
|
||||||
|
"Shqip": "sqi",
|
||||||
|
"Slovencina": "slk",
|
||||||
|
"Slovenscina": "slv",
|
||||||
|
"Slovenčina": "slk",
|
||||||
|
"Slovenščina": "slv",
|
||||||
|
"Soomaaliga": "som",
|
||||||
|
"Srpski": "srp",
|
||||||
|
"Suomi": "fin",
|
||||||
|
"Svenska": "swe",
|
||||||
|
"Taqbaylit": "kab",
|
||||||
|
"TcYi": "aka",
|
||||||
|
"Tieng Viet": "vie",
|
||||||
|
"Tiếng Việt": "vie",
|
||||||
|
"Turkce": "tur",
|
||||||
|
"Türkçe": "tur",
|
||||||
|
"Tɕɥi": "aka",
|
||||||
|
"Ukrayinska": "ukr",
|
||||||
|
"Zhong Wen": "zho",
|
||||||
|
"Zhong Wen Fan Ti": "zh-Hant",
|
||||||
|
"Zhong Wen Jian Ti": "zh-Hans",
|
||||||
|
"`bryt": "heb",
|
||||||
|
"aithy": "tha",
|
||||||
|
"baaNlaa": "ben",
|
||||||
|
"bhaasaakhmaer": "khm",
|
||||||
|
"bmaackaa": "mya",
|
||||||
|
"eesti keel": "est",
|
||||||
|
"frsy": "fas",
|
||||||
|
"gujraatii": "guj",
|
||||||
|
"hangugeo": "kor",
|
||||||
|
"hindii": "hin",
|
||||||
|
"isiXhosa": "xho",
|
||||||
|
"isiZulu": "zul",
|
||||||
|
"k`art`uli": "kat",
|
||||||
|
"knndd": "kan",
|
||||||
|
"maithilii maithilii": "mai",
|
||||||
|
"mlyaallN": "mal",
|
||||||
|
"mraatthii": "mar",
|
||||||
|
"nepaalii": "nep",
|
||||||
|
"oddiaa": "ori",
|
||||||
|
"pNjaabii": "pan",
|
||||||
|
"pStw": "pus",
|
||||||
|
"phaasaaaithy": "tha",
|
||||||
|
"rdw": "urd",
|
||||||
|
"sNskRtm": "san",
|
||||||
|
"siNhl": "sin",
|
||||||
|
"srpskokhrvatski": "hbs",
|
||||||
|
"tatarcha": "tat",
|
||||||
|
"telugu": "tel",
|
||||||
|
"tlhIngan Hol": "tlh",
|
||||||
|
"tmilll": "tam",
|
||||||
|
"tochiki": "tgk",
|
||||||
|
"yyidySH": "yid",
|
||||||
|
"zaboni tochiki": "tgk",
|
||||||
|
"Íslenska": "isl",
|
||||||
|
"Čeština": "ces",
|
||||||
|
"Ελληνικά": "ell",
|
||||||
|
"Адыгэбзэ": "ady",
|
||||||
|
"Беларуская": "bel",
|
||||||
|
"Български": "bul",
|
||||||
|
"Македонски": "mkd",
|
||||||
|
"Монгол": "mon",
|
||||||
|
"Русский": "rus",
|
||||||
|
"Српски": "srp",
|
||||||
|
"Українська": "ukr",
|
||||||
|
"забо́ни тоҷикӣ́": "tgk",
|
||||||
|
"српскохрватски": "hbs",
|
||||||
|
"татарча": "tat",
|
||||||
|
"тоҷикӣ": "tgk",
|
||||||
|
"Қазақша": "kaz",
|
||||||
|
"Հայերեն": "hye",
|
||||||
|
"ייִדיש": "yid",
|
||||||
|
"עברית": "heb",
|
||||||
|
"اردو": "urd",
|
||||||
|
"العربية": "ara",
|
||||||
|
"فارسی": "fas",
|
||||||
|
"پښتو": "pus",
|
||||||
|
"नेपाली": "nep",
|
||||||
|
"मराठी": "mar",
|
||||||
|
"मैथिली মৈথিলী": "mai",
|
||||||
|
"संस्कृतम्": "san",
|
||||||
|
"हिन्दी": "hin",
|
||||||
|
"বাংলা": "ben",
|
||||||
|
"ਪੰਜਾਬੀ": "pan",
|
||||||
|
"ગુજરાતી": "guj",
|
||||||
|
"ଓଡ଼ିଆ": "ori",
|
||||||
|
"தமிழ்": "tam",
|
||||||
|
"తెలుగు": "tel",
|
||||||
|
"ಕನ್ನಡ": "kan",
|
||||||
|
"മലയാളം": "mal",
|
||||||
|
"සිංහල": "sin",
|
||||||
|
"ภาษาไทย": "tha",
|
||||||
|
"ไทย": "tha",
|
||||||
|
"ဗမာစကာ": "mya",
|
||||||
|
"ქართული": "kat",
|
||||||
|
"ភាសាខ្មែរ": "khm",
|
||||||
|
"中文": "zho",
|
||||||
|
"中文简体": "zh-Hans",
|
||||||
|
"中文繁體": "zh-Hant",
|
||||||
|
"廣東話": "zho",
|
||||||
|
"日本語": "jpn",
|
||||||
|
"한국어": "kor"
|
||||||
|
},
|
||||||
|
"regions": {
|
||||||
|
"Latin": "419",
|
||||||
|
"Latinoamerica": "419",
|
||||||
|
"Latinoamericano": "419",
|
||||||
|
"Latinoamérica": "419"
|
||||||
|
},
|
||||||
|
"scripts": {
|
||||||
|
"Fan Ti ": "Hant",
|
||||||
|
"Jian Ti ": "Hans",
|
||||||
|
"Simplified": "Hans",
|
||||||
|
"Traditional": "Hant",
|
||||||
|
"简体": "Hans",
|
||||||
|
"繁體": "Hant"
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,169 @@
|
||||||
|
import typing
|
||||||
|
|
||||||
|
from babelfish import (
|
||||||
|
COUNTRIES,
|
||||||
|
Country,
|
||||||
|
CountryReverseError,
|
||||||
|
LANGUAGE_MATRIX,
|
||||||
|
Language,
|
||||||
|
LanguageReverseError,
|
||||||
|
SCRIPTS,
|
||||||
|
Script,
|
||||||
|
country_converters,
|
||||||
|
language_converters
|
||||||
|
)
|
||||||
|
from babelfish.converters import CaseInsensitiveDict
|
||||||
|
|
||||||
|
from rebulk import Rebulk
|
||||||
|
from rebulk.match import Match
|
||||||
|
|
||||||
|
from trakit.config import Config
|
||||||
|
from trakit.context import Context
|
||||||
|
from trakit.converters.country import GuessCountryConverter
|
||||||
|
from trakit.converters.language import GuessLanguageConverter
|
||||||
|
from trakit.words import blank_match, blank_release_names, to_combinations, to_match, to_sentence, to_words
|
||||||
|
|
||||||
|
|
||||||
|
class LanguageFinder:
|
||||||
|
|
||||||
|
def __init__(self, config: Config):
|
||||||
|
self.country_max_words = 1
|
||||||
|
for k, v in COUNTRIES.items():
|
||||||
|
self.country_max_words = max(self.country_max_words, v.count(' '))
|
||||||
|
|
||||||
|
self.language_max_words = 1
|
||||||
|
for v in LANGUAGE_MATRIX:
|
||||||
|
self.language_max_words = max(self.language_max_words, v.name.count(' '))
|
||||||
|
|
||||||
|
self.script_max_words = 1
|
||||||
|
for v in config.scripts.keys():
|
||||||
|
self.script_max_words = max(self.script_max_words, v.count(' '))
|
||||||
|
|
||||||
|
self.region_max_words = 1
|
||||||
|
for v in config.regions.keys():
|
||||||
|
self.region_max_words = max(self.region_max_words, v.count(' '))
|
||||||
|
|
||||||
|
SCRIPTS['419'] = 'Latin America and the Caribbean' # Until babelfish support UN.M49
|
||||||
|
country_converters['guess'] = GuessCountryConverter(config.countries)
|
||||||
|
language_converters['guess'] = GuessLanguageConverter(config.languages)
|
||||||
|
self.regions = CaseInsensitiveDict(config.regions)
|
||||||
|
self.scripts = CaseInsensitiveDict(config.scripts)
|
||||||
|
self.common_words = CaseInsensitiveDict(dict.fromkeys(config.ignored, 0))
|
||||||
|
self.implicit = CaseInsensitiveDict(config.implicit_languages)
|
||||||
|
|
||||||
|
def _find_country(self, value: str):
|
||||||
|
combinations = to_combinations(to_words(value), self.country_max_words)
|
||||||
|
for c in combinations:
|
||||||
|
code = to_sentence(c)
|
||||||
|
try:
|
||||||
|
return to_match(c, Country.fromguess(code))
|
||||||
|
except CountryReverseError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
def _find_script(self, value: str):
|
||||||
|
combinations = to_combinations(to_words(value), self.script_max_words)
|
||||||
|
for c in combinations:
|
||||||
|
code = to_sentence(c)
|
||||||
|
try:
|
||||||
|
return to_match(c, Script(self.scripts.get(code, code)))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
def _find_region(self, value: str):
|
||||||
|
combinations = to_combinations(to_words(value), self.region_max_words)
|
||||||
|
for c in combinations:
|
||||||
|
code = to_sentence(c)
|
||||||
|
try:
|
||||||
|
return to_match(c, Script(self.regions.get(code, code)))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
def _find_implicit_language(self, combinations: typing.List[typing.List[Match]]):
|
||||||
|
for c in combinations:
|
||||||
|
sentence = to_sentence(c)
|
||||||
|
if sentence in self.implicit:
|
||||||
|
return to_match(c, Language.fromietf(self.implicit[sentence]))
|
||||||
|
|
||||||
|
region = self._find_region(sentence)
|
||||||
|
if region and region.value.code in self.implicit:
|
||||||
|
lang = Language.fromietf(self.implicit[region.value.code])
|
||||||
|
return Match(region.start, region.end, value=lang, input_string=region.input_string)
|
||||||
|
|
||||||
|
try:
|
||||||
|
country = Country.fromguess(sentence)
|
||||||
|
if country.alpha2 in self.implicit:
|
||||||
|
lang = Language.fromietf(self.implicit[country.alpha2])
|
||||||
|
if lang.name.lower() == sentence.lower():
|
||||||
|
lang = Language.fromname(sentence)
|
||||||
|
|
||||||
|
return to_match(c, lang)
|
||||||
|
except CountryReverseError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def accept_word(self, string: str):
|
||||||
|
return string.lower() not in self.common_words and not string.isnumeric()
|
||||||
|
|
||||||
|
def find_language(self, value: str, context: Context):
|
||||||
|
value = blank_release_names(value)
|
||||||
|
all_words = to_words(value, predicate=self.accept_word)
|
||||||
|
combinations = to_combinations(all_words, self.language_max_words)
|
||||||
|
implicit_lang = self._find_implicit_language(combinations)
|
||||||
|
implicit_accepted = implicit_lang and context.accept(implicit_lang.value)
|
||||||
|
|
||||||
|
if implicit_accepted and implicit_lang.value.script and implicit_lang.value.script.code.isnumeric():
|
||||||
|
return implicit_lang
|
||||||
|
elif implicit_lang and not implicit_accepted:
|
||||||
|
value = blank_match(implicit_lang)
|
||||||
|
all_words = to_words(value, predicate=self.accept_word)
|
||||||
|
combinations = to_combinations(all_words, self.language_max_words)
|
||||||
|
|
||||||
|
for c in combinations:
|
||||||
|
language_sentence = to_sentence(c)
|
||||||
|
try:
|
||||||
|
lang = Language.fromguess(language_sentence)
|
||||||
|
except LanguageReverseError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
match_lang = to_match(c, lang)
|
||||||
|
remaining_sentence = blank_match(match_lang)
|
||||||
|
for combination in to_combinations(to_words(remaining_sentence), self.country_max_words):
|
||||||
|
sentence = to_sentence(combination)
|
||||||
|
country = self._find_country(sentence)
|
||||||
|
if country:
|
||||||
|
try:
|
||||||
|
# discard country if value is actually the language name
|
||||||
|
Language.fromguess(country.raw)
|
||||||
|
except LanguageReverseError:
|
||||||
|
lang = Language(lang.alpha3, country=country.value, script=lang.script)
|
||||||
|
break
|
||||||
|
|
||||||
|
region = self._find_region(sentence)
|
||||||
|
if region:
|
||||||
|
lang = Language(lang.alpha3, country=lang.country, script=region.value)
|
||||||
|
break
|
||||||
|
|
||||||
|
script = self._find_script(sentence)
|
||||||
|
if script:
|
||||||
|
lang = Language(lang.alpha3, country=lang.country, script=script.value)
|
||||||
|
break
|
||||||
|
|
||||||
|
if implicit_accepted and implicit_lang.value.alpha3 == lang.alpha3 and not lang.country and not lang.script:
|
||||||
|
return implicit_lang
|
||||||
|
|
||||||
|
if context.accept(lang):
|
||||||
|
return to_match(c, lang)
|
||||||
|
|
||||||
|
if implicit_accepted:
|
||||||
|
return implicit_lang
|
||||||
|
|
||||||
|
def find(self, value: str, context: Context):
|
||||||
|
match = self.find_language(value, context)
|
||||||
|
if match:
|
||||||
|
return match.start, match.end, {'value': match.value}
|
||||||
|
|
||||||
|
|
||||||
|
def language(config: Config):
|
||||||
|
rebulk = Rebulk()
|
||||||
|
rebulk.functional(LanguageFinder(config).find, name='language')
|
||||||
|
|
||||||
|
return rebulk
|
|
@ -0,0 +1,32 @@
|
||||||
|
import re
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
from rebulk import Rebulk
|
||||||
|
from rebulk.validators import chars_surround
|
||||||
|
|
||||||
|
from trakit.config import Config
|
||||||
|
from trakit.language import language
|
||||||
|
from trakit.words import seps
|
||||||
|
|
||||||
|
|
||||||
|
def configure(config: Config):
|
||||||
|
seps_surround = partial(chars_surround, seps)
|
||||||
|
|
||||||
|
others = Rebulk()
|
||||||
|
others.defaults(ignore_case=True, validator=seps_surround)
|
||||||
|
others.regex_defaults(flags=re.IGNORECASE,
|
||||||
|
abbreviations=[(r'-', rf'[{re.escape("".join(seps))}]')],
|
||||||
|
validator=seps_surround)
|
||||||
|
for name in ('forced', 'commentary', 'external'):
|
||||||
|
others.string(name, name=name, value=True)
|
||||||
|
|
||||||
|
others.string('sdh', name='hearing_impaired', value=True)
|
||||||
|
others.string('alternate', name='version', value='alternate')
|
||||||
|
others.string('descriptive', name='descriptive', value=True)
|
||||||
|
others.regex('cc', 'closed-captions?', name='closed_caption', value=True)
|
||||||
|
|
||||||
|
rebulk = Rebulk()
|
||||||
|
rebulk.rebulk(language(config))
|
||||||
|
rebulk.rebulk(others)
|
||||||
|
|
||||||
|
return rebulk
|
|
@ -0,0 +1,99 @@
|
||||||
|
import re
|
||||||
|
import typing
|
||||||
|
|
||||||
|
from rebulk.match import Match
|
||||||
|
|
||||||
|
seps = frozenset(r' [](){}+*|=-_~#/\\.,;:' + '\uff08\uff09')
|
||||||
|
suppress_chars = frozenset("'")
|
||||||
|
release_name_re = re.compile(r'(?P<release>[^\.\s]+(?:\.[^\.\s]+){2,})')
|
||||||
|
|
||||||
|
|
||||||
|
def to_words(value: str,
|
||||||
|
separators: typing.FrozenSet[str] = seps,
|
||||||
|
ignore_chars: typing.FrozenSet[str] = suppress_chars,
|
||||||
|
predicate: typing.Callable[[str], bool] = lambda x: True):
|
||||||
|
input_string = value
|
||||||
|
start = 0
|
||||||
|
i = 0
|
||||||
|
word = ''
|
||||||
|
words: typing.List[Match] = []
|
||||||
|
for c in input_string:
|
||||||
|
i += 1
|
||||||
|
if c in ignore_chars:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if c not in separators:
|
||||||
|
word += c
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not word:
|
||||||
|
start = i
|
||||||
|
continue
|
||||||
|
|
||||||
|
end = i - 1
|
||||||
|
if not predicate(value[start:end]):
|
||||||
|
input_string = blank(input_string, start, end)
|
||||||
|
else:
|
||||||
|
words.append(Match(start, i - 1, value=word))
|
||||||
|
|
||||||
|
word = ''
|
||||||
|
start = i
|
||||||
|
|
||||||
|
if word:
|
||||||
|
if not predicate(value[start:]):
|
||||||
|
input_string = blank(input_string, start, len(input_string))
|
||||||
|
else:
|
||||||
|
words.append(Match(start, i, value=word))
|
||||||
|
|
||||||
|
for w in words:
|
||||||
|
w.input_string = input_string
|
||||||
|
|
||||||
|
return words
|
||||||
|
|
||||||
|
|
||||||
|
def to_combinations(words: typing.List[Match], max_items: int):
|
||||||
|
results: typing.List[typing.List[Match]] = []
|
||||||
|
n_words = len(words)
|
||||||
|
cur_size = min(max_items, n_words)
|
||||||
|
start = 0
|
||||||
|
while cur_size > 0:
|
||||||
|
end = start + cur_size
|
||||||
|
if end > n_words:
|
||||||
|
start = 0
|
||||||
|
cur_size -= 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
results.append(words[start:end])
|
||||||
|
start += 1
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def to_sentence(combination: typing.List[Match]):
|
||||||
|
return ' '.join([c.value for c in combination])
|
||||||
|
|
||||||
|
|
||||||
|
def to_match(combination: typing.List[Match], value: typing.Any):
|
||||||
|
start = combination[0].start
|
||||||
|
end = combination[-1].end
|
||||||
|
input_string = combination[0].input_string
|
||||||
|
|
||||||
|
return Match(start, end, value=value, input_string=input_string)
|
||||||
|
|
||||||
|
|
||||||
|
def blank(string: str, start: int, end: int):
|
||||||
|
return string[:start] + ''.ljust(end - start, ' ') + string[end:]
|
||||||
|
|
||||||
|
|
||||||
|
def blank_match(match: Match):
|
||||||
|
return blank(match.input_string, match.start, match.end)
|
||||||
|
|
||||||
|
|
||||||
|
def blank_release_names(value: str):
|
||||||
|
result = value
|
||||||
|
match = release_name_re.search(value)
|
||||||
|
while match:
|
||||||
|
result = blank(result, match.start('release'), match.end('release'))
|
||||||
|
match = release_name_re.search(value, match.end('release'))
|
||||||
|
|
||||||
|
return result
|
|
@ -17,7 +17,7 @@ ga4mp==2.0.4
|
||||||
guess_language-spirit==0.5.3
|
guess_language-spirit==0.5.3
|
||||||
guessit==3.5.0
|
guessit==3.5.0
|
||||||
jsonschema==4.17.0
|
jsonschema==4.17.0
|
||||||
knowit==0.4.0
|
knowit==0.5.2
|
||||||
peewee==3.15.3
|
peewee==3.15.3
|
||||||
py-pretty==1
|
py-pretty==1
|
||||||
pycountry==22.3.5
|
pycountry==22.3.5
|
||||||
|
@ -80,8 +80,9 @@ zipp==3.10.0
|
||||||
markupsafe==2.1.1
|
markupsafe==2.1.1
|
||||||
|
|
||||||
# Required-by: knowit
|
# Required-by: knowit
|
||||||
pymediainfo==5.1.0
|
pymediainfo==6.0.1
|
||||||
pyyaml==6.0
|
pyyaml==6.0
|
||||||
|
trakit==0.2.1
|
||||||
|
|
||||||
# Required-by: python-socketio
|
# Required-by: python-socketio
|
||||||
bidict==0.22.0
|
bidict==0.22.0
|
||||||
|
|
Loading…
Reference in New Issue