""" | |
All of the Enums that are used throughout the chardet package. | |
:author: Dan Blanchard ([email protected]) | |
""" | |
class InputState: | |
""" | |
This enum represents the different states a universal detector can be in. | |
""" | |
PURE_ASCII = 0 | |
ESC_ASCII = 1 | |
HIGH_BYTE = 2 | |
class LanguageFilter: | |
""" | |
This enum represents the different language filters we can apply to a | |
``UniversalDetector``. | |
""" | |
CHINESE_SIMPLIFIED = 0x01 | |
CHINESE_TRADITIONAL = 0x02 | |
JAPANESE = 0x04 | |
KOREAN = 0x08 | |
NON_CJK = 0x10 | |
ALL = 0x1F | |
CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL | |
CJK = CHINESE | JAPANESE | KOREAN | |
class ProbingState: | |
""" | |
This enum represents the different states a prober can be in. | |
""" | |
DETECTING = 0 | |
FOUND_IT = 1 | |
NOT_ME = 2 | |
class MachineState: | |
""" | |
This enum represents the different states a state machine can be in. | |
""" | |
START = 0 | |
ERROR = 1 | |
ITS_ME = 2 | |
class SequenceLikelihood: | |
""" | |
This enum represents the likelihood of a character following the previous one. | |
""" | |
NEGATIVE = 0 | |
UNLIKELY = 1 | |
LIKELY = 2 | |
POSITIVE = 3 | |
def get_num_categories(cls): | |
""":returns: The number of likelihood categories in the enum.""" | |
return 4 | |
class CharacterCategory: | |
""" | |
This enum represents the different categories language models for | |
``SingleByteCharsetProber`` put characters into. | |
Anything less than CONTROL is considered a letter. | |
""" | |
UNDEFINED = 255 | |
LINE_BREAK = 254 | |
SYMBOL = 253 | |
DIGIT = 252 | |
CONTROL = 251 | |