bazarr/libs/chardet/enums.py

83 lines
1.6 KiB
Python
Raw Normal View History

"""
All of the Enums that are used throughout the chardet package.
:author: Dan Blanchard (dan.blanchard@gmail.com)
"""
2022-11-07 18:06:49 +00:00
class InputState:
"""
This enum represents the different states a universal detector can be in.
"""
2022-11-07 18:06:49 +00:00
PURE_ASCII = 0
ESC_ASCII = 1
HIGH_BYTE = 2
2022-11-07 18:06:49 +00:00
class LanguageFilter:
"""
This enum represents the different language filters we can apply to a
``UniversalDetector``.
"""
2022-11-07 18:06:49 +00:00
CHINESE_SIMPLIFIED = 0x01
CHINESE_TRADITIONAL = 0x02
JAPANESE = 0x04
KOREAN = 0x08
NON_CJK = 0x10
ALL = 0x1F
CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
CJK = CHINESE | JAPANESE | KOREAN
2022-11-07 18:06:49 +00:00
class ProbingState:
"""
This enum represents the different states a prober can be in.
"""
2022-11-07 18:06:49 +00:00
DETECTING = 0
FOUND_IT = 1
NOT_ME = 2
2022-11-07 18:06:49 +00:00
class MachineState:
"""
This enum represents the different states a state machine can be in.
"""
2022-11-07 18:06:49 +00:00
START = 0
ERROR = 1
ITS_ME = 2
2022-11-07 18:06:49 +00:00
class SequenceLikelihood:
"""
This enum represents the likelihood of a character following the previous one.
"""
2022-11-07 18:06:49 +00:00
NEGATIVE = 0
UNLIKELY = 1
LIKELY = 2
POSITIVE = 3
@classmethod
def get_num_categories(cls):
""":returns: The number of likelihood categories in the enum."""
return 4
2022-11-07 18:06:49 +00:00
class CharacterCategory:
"""
This enum represents the different categories language models for
``SingleByteCharsetProber`` put characters into.
Anything less than CONTROL is considered a letter.
"""
2022-11-07 18:06:49 +00:00
UNDEFINED = 255
LINE_BREAK = 254
SYMBOL = 253
DIGIT = 252
CONTROL = 251