mirror of
https://github.com/morpheus65535/bazarr
synced 2024-12-29 02:55:52 +00:00
465 lines
18 KiB
Python
465 lines
18 KiB
Python
UNICODE_BASIC_LATIN = 1
|
|
UNICODE_LATIN_1_SUPPLEMENT = 2
|
|
UNICODE_LATIN_EXTENDED_A = 3
|
|
UNICODE_LATIN_EXTENDED_B = 4
|
|
UNICODE_IPA_EXTENSIONS = 5
|
|
UNICODE_SPACING_MODIFIER_LETTERS = 6
|
|
UNICODE_COMBINING_DIACRITICAL_MARKS = 7
|
|
UNICODE_GREEK_AND_COPTIC = 8
|
|
UNICODE_CYRILLIC = 9
|
|
UNICODE_CYRILLIC_SUPPLEMENT = 10
|
|
UNICODE_ARMENIAN = 11
|
|
UNICODE_HEBREW = 12
|
|
UNICODE_ARABIC = 13
|
|
UNICODE_SYRIAC = 14
|
|
UNICODE_ARABIC_SUPPLEMENT = 15
|
|
UNICODE_THAANA = 16
|
|
UNICODE_NKO = 17
|
|
UNICODE_SAMARITAN = 18
|
|
UNICODE_MANDAIC = 19
|
|
UNICODE_ARABIC_EXTENDED_A = 20
|
|
UNICODE_DEVANAGARI = 21
|
|
UNICODE_BENGALI = 22
|
|
UNICODE_GURMUKHI = 23
|
|
UNICODE_GUJARATI = 24
|
|
UNICODE_ORIYA = 25
|
|
UNICODE_TAMIL = 26
|
|
UNICODE_TELUGU = 27
|
|
UNICODE_KANNADA = 28
|
|
UNICODE_MALAYALAM = 29
|
|
UNICODE_SINHALA = 30
|
|
UNICODE_THAI = 31
|
|
UNICODE_LAO = 32
|
|
UNICODE_TIBETAN = 33
|
|
UNICODE_MYANMAR = 34
|
|
UNICODE_GEORGIAN = 35
|
|
UNICODE_HANGUL_JAMO = 36
|
|
UNICODE_ETHIOPIC = 37
|
|
UNICODE_ETHIOPIC_SUPPLEMENT = 38
|
|
UNICODE_CHEROKEE = 39
|
|
UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 40
|
|
UNICODE_OGHAM = 41
|
|
UNICODE_RUNIC = 42
|
|
UNICODE_TAGALOG = 43
|
|
UNICODE_HANUNOO = 44
|
|
UNICODE_BUHID = 45
|
|
UNICODE_TAGBANWA = 46
|
|
UNICODE_KHMER = 47
|
|
UNICODE_MONGOLIAN = 48
|
|
UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 49
|
|
UNICODE_LIMBU = 50
|
|
UNICODE_TAI_LE = 51
|
|
UNICODE_NEW_TAI_LUE = 52
|
|
UNICODE_KHMER_SYMBOLS = 53
|
|
UNICODE_BUGINESE = 54
|
|
UNICODE_TAI_THAM = 55
|
|
UNICODE_BALINESE = 56
|
|
UNICODE_SUNDANESE = 57
|
|
UNICODE_BATAK = 58
|
|
UNICODE_LEPCHA = 59
|
|
UNICODE_OL_CHIKI = 60
|
|
UNICODE_SUNDANESE_SUPPLEMENT = 61
|
|
UNICODE_VEDIC_EXTENSIONS = 62
|
|
UNICODE_PHONETIC_EXTENSIONS = 63
|
|
UNICODE_PHONETIC_EXTENSIONS_SUPPLEMENT = 64
|
|
UNICODE_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 65
|
|
UNICODE_LATIN_EXTENDED_ADDITIONAL = 66
|
|
UNICODE_GREEK_EXTENDED = 67
|
|
UNICODE_GENERAL_PUNCTUATION = 68
|
|
UNICODE_SUPERSCRIPTS_AND_SUBSCRIPTS = 69
|
|
UNICODE_CURRENCY_SYMBOLS = 70
|
|
UNICODE_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS = 71
|
|
UNICODE_LETTERLIKE_SYMBOLS = 72
|
|
UNICODE_NUMBER_FORMS = 73
|
|
UNICODE_ARROWS = 74
|
|
UNICODE_MATHEMATICAL_OPERATORS = 75
|
|
UNICODE_MISCELLANEOUS_TECHNICAL = 76
|
|
UNICODE_CONTROL_PICTURES = 77
|
|
UNICODE_OPTICAL_CHARACTER_RECOGNITION = 78
|
|
UNICODE_ENCLOSED_ALPHANUMERICS = 79
|
|
UNICODE_BOX_DRAWING = 80
|
|
UNICODE_BLOCK_ELEMENTS = 81
|
|
UNICODE_GEOMETRIC_SHAPES = 82
|
|
UNICODE_MISCELLANEOUS_SYMBOLS = 83
|
|
UNICODE_DINGBATS = 84
|
|
UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 85
|
|
UNICODE_SUPPLEMENTAL_ARROWS_A = 86
|
|
UNICODE_BRAILLE_PATTERNS = 87
|
|
UNICODE_SUPPLEMENTAL_ARROWS_B = 88
|
|
UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 89
|
|
UNICODE_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 90
|
|
UNICODE_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 91
|
|
UNICODE_GLAGOLITIC = 92
|
|
UNICODE_LATIN_EXTENDED_C = 93
|
|
UNICODE_COPTIC = 94
|
|
UNICODE_GEORGIAN_SUPPLEMENT = 95
|
|
UNICODE_TIFINAGH = 96
|
|
UNICODE_ETHIOPIC_EXTENDED = 97
|
|
UNICODE_CYRILLIC_EXTENDED_A = 98
|
|
UNICODE_SUPPLEMENTAL_PUNCTUATION = 99
|
|
UNICODE_CJK_RADICALS_SUPPLEMENT = 100
|
|
UNICODE_KANGXI_RADICALS = 101
|
|
UNICODE_IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 102
|
|
UNICODE_CJK_SYMBOLS_AND_PUNCTUATION = 103
|
|
UNICODE_HIRAGANA = 104
|
|
UNICODE_KATAKANA = 105
|
|
UNICODE_BOPOMOFO = 106
|
|
UNICODE_HANGUL_COMPATIBILITY_JAMO = 107
|
|
UNICODE_KANBUN = 108
|
|
UNICODE_BOPOMOFO_EXTENDED = 109
|
|
UNICODE_CJK_STROKES = 110
|
|
UNICODE_KATAKANA_PHONETIC_EXTENSIONS = 111
|
|
UNICODE_ENCLOSED_CJK_LETTERS_AND_MONTHS = 112
|
|
UNICODE_CJK_COMPATIBILITY = 113
|
|
UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 114
|
|
UNICODE_YIJING_HEXAGRAM_SYMBOLS = 115
|
|
UNICODE_CJK_UNIFIED_IDEOGRAPHS = 116
|
|
UNICODE_YI_SYLLABLES = 117
|
|
UNICODE_YI_RADICALS = 118
|
|
UNICODE_LISU = 119
|
|
UNICODE_VAI = 120
|
|
UNICODE_CYRILLIC_EXTENDED_B = 121
|
|
UNICODE_BAMUM = 122
|
|
UNICODE_MODIFIER_TONE_LETTERS = 123
|
|
UNICODE_LATIN_EXTENDED_D = 124
|
|
UNICODE_SYLOTI_NAGRI = 125
|
|
UNICODE_COMMON_INDIC_NUMBER_FORMS = 126
|
|
UNICODE_PHAGS_PA = 127
|
|
UNICODE_SAURASHTRA = 128
|
|
UNICODE_DEVANAGARI_EXTENDED = 129
|
|
UNICODE_KAYAH_LI = 130
|
|
UNICODE_REJANG = 131
|
|
UNICODE_HANGUL_JAMO_EXTENDED_A = 132
|
|
UNICODE_JAVANESE = 133
|
|
UNICODE_CHAM = 134
|
|
UNICODE_MYANMAR_EXTENDED_A = 135
|
|
UNICODE_TAI_VIET = 136
|
|
UNICODE_MEETEI_MAYEK_EXTENSIONS = 137
|
|
UNICODE_ETHIOPIC_EXTENDED_A = 138
|
|
UNICODE_MEETEI_MAYEK = 139
|
|
UNICODE_HANGUL_SYLLABLES = 140
|
|
UNICODE_HANGUL_JAMO_EXTENDED_B = 141
|
|
UNICODE_HIGH_SURROGATES = 142
|
|
UNICODE_HIGH_PRIVATE_USE_SURROGATES = 143
|
|
UNICODE_LOW_SURROGATES = 144
|
|
UNICODE_PRIVATE_USE_AREA = 145
|
|
UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS = 146
|
|
UNICODE_ALPHABETIC_PRESENTATION_FORMS = 147
|
|
UNICODE_ARABIC_PRESENTATION_FORMS_A = 148
|
|
UNICODE_VARIATION_SELECTORS = 149
|
|
UNICODE_VERTICAL_FORMS = 150
|
|
UNICODE_COMBINING_HALF_MARKS = 151
|
|
UNICODE_CJK_COMPATIBILITY_FORMS = 152
|
|
UNICODE_SMALL_FORM_VARIANTS = 153
|
|
UNICODE_ARABIC_PRESENTATION_FORMS_B = 154
|
|
UNICODE_HALFWIDTH_AND_FULLWIDTH_FORMS = 155
|
|
UNICODE_SPECIALS = 156
|
|
UNICODE_LINEAR_B_SYLLABARY = 157
|
|
UNICODE_LINEAR_B_IDEOGRAMS = 158
|
|
UNICODE_AEGEAN_NUMBERS = 159
|
|
UNICODE_ANCIENT_GREEK_NUMBERS = 160
|
|
UNICODE_ANCIENT_SYMBOLS = 161
|
|
UNICODE_PHAISTOS_DISC = 162
|
|
UNICODE_LYCIAN = 163
|
|
UNICODE_CARIAN = 164
|
|
UNICODE_OLD_ITALIC = 165
|
|
UNICODE_GOTHIC = 166
|
|
UNICODE_UGARITIC = 167
|
|
UNICODE_OLD_PERSIAN = 168
|
|
UNICODE_DESERET = 169
|
|
UNICODE_SHAVIAN = 170
|
|
UNICODE_OSMANYA = 171
|
|
UNICODE_CYPRIOT_SYLLABARY = 172
|
|
UNICODE_IMPERIAL_ARAMAIC = 173
|
|
UNICODE_PHOENICIAN = 174
|
|
UNICODE_LYDIAN = 175
|
|
UNICODE_MEROITIC_HIEROGLYPHS = 176
|
|
UNICODE_MEROITIC_CURSIVE = 177
|
|
UNICODE_KHAROSHTHI = 178
|
|
UNICODE_OLD_SOUTH_ARABIAN = 179
|
|
UNICODE_AVESTAN = 180
|
|
UNICODE_INSCRIPTIONAL_PARTHIAN = 181
|
|
UNICODE_INSCRIPTIONAL_PAHLAVI = 182
|
|
UNICODE_OLD_TURKIC = 183
|
|
UNICODE_RUMI_NUMERAL_SYMBOLS = 184
|
|
UNICODE_BRAHMI = 185
|
|
UNICODE_KAITHI = 186
|
|
UNICODE_SORA_SOMPENG = 187
|
|
UNICODE_CHAKMA = 188
|
|
UNICODE_SHARADA = 189
|
|
UNICODE_TAKRI = 190
|
|
UNICODE_CUNEIFORM = 191
|
|
UNICODE_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 192
|
|
UNICODE_EGYPTIAN_HIEROGLYPHS = 193
|
|
UNICODE_BAMUM_SUPPLEMENT = 194
|
|
UNICODE_MIAO = 195
|
|
UNICODE_KANA_SUPPLEMENT = 196
|
|
UNICODE_BYZANTINE_MUSICAL_SYMBOLS = 197
|
|
UNICODE_MUSICAL_SYMBOLS = 198
|
|
UNICODE_ANCIENT_GREEK_MUSICAL_NOTATION = 199
|
|
UNICODE_TAI_XUAN_JING_SYMBOLS = 200
|
|
UNICODE_COUNTING_ROD_NUMERALS = 201
|
|
UNICODE_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 202
|
|
UNICODE_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 203
|
|
UNICODE_MAHJONG_TILES = 204
|
|
UNICODE_DOMINO_TILES = 205
|
|
UNICODE_PLAYING_CARDS = 206
|
|
UNICODE_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 207
|
|
UNICODE_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 208
|
|
UNICODE_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 209
|
|
UNICODE_EMOTICONS = 210
|
|
UNICODE_TRANSPORT_AND_MAP_SYMBOLS = 211
|
|
UNICODE_ALCHEMICAL_SYMBOLS = 212
|
|
UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 213
|
|
UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 214
|
|
UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 215
|
|
UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 216
|
|
UNICODE_TAGS = 217
|
|
UNICODE_VARIATION_SELECTORS_SUPPLEMENT = 218
|
|
UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 219
|
|
UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 220
|
|
|
|
|
|
_unicode_blocks = [
|
|
(UNICODE_BASIC_LATIN, 0x0000, 0x007F),
|
|
(UNICODE_LATIN_1_SUPPLEMENT, 0x0080, 0x00FF),
|
|
(UNICODE_LATIN_EXTENDED_A, 0x0100, 0x017F),
|
|
(UNICODE_LATIN_EXTENDED_B, 0x0180, 0x024F),
|
|
(UNICODE_IPA_EXTENSIONS, 0x0250, 0x02AF),
|
|
(UNICODE_SPACING_MODIFIER_LETTERS, 0x02B0, 0x02FF),
|
|
(UNICODE_COMBINING_DIACRITICAL_MARKS, 0x0300, 0x036F),
|
|
(UNICODE_GREEK_AND_COPTIC, 0x0370, 0x03FF),
|
|
(UNICODE_CYRILLIC, 0x0400, 0x04FF),
|
|
(UNICODE_CYRILLIC_SUPPLEMENT, 0x0500, 0x052F),
|
|
(UNICODE_ARMENIAN, 0x0530, 0x058F),
|
|
(UNICODE_HEBREW, 0x0590, 0x05FF),
|
|
(UNICODE_ARABIC, 0x0600, 0x06FF),
|
|
(UNICODE_SYRIAC, 0x0700, 0x074F),
|
|
(UNICODE_ARABIC_SUPPLEMENT, 0x0750, 0x077F),
|
|
(UNICODE_THAANA, 0x0780, 0x07BF),
|
|
(UNICODE_NKO, 0x07C0, 0x07FF),
|
|
(UNICODE_SAMARITAN, 0x0800, 0x083F),
|
|
(UNICODE_MANDAIC, 0x0840, 0x085F),
|
|
(UNICODE_ARABIC_EXTENDED_A, 0x08A0, 0x08FF),
|
|
(UNICODE_DEVANAGARI, 0x0900, 0x097F),
|
|
(UNICODE_BENGALI, 0x0980, 0x09FF),
|
|
(UNICODE_GURMUKHI, 0x0A00, 0x0A7F),
|
|
(UNICODE_GUJARATI, 0x0A80, 0x0AFF),
|
|
(UNICODE_ORIYA, 0x0B00, 0x0B7F),
|
|
(UNICODE_TAMIL, 0x0B80, 0x0BFF),
|
|
(UNICODE_TELUGU, 0x0C00, 0x0C7F),
|
|
(UNICODE_KANNADA, 0x0C80, 0x0CFF),
|
|
(UNICODE_MALAYALAM, 0x0D00, 0x0D7F),
|
|
(UNICODE_SINHALA, 0x0D80, 0x0DFF),
|
|
(UNICODE_THAI, 0x0E00, 0x0E7F),
|
|
(UNICODE_LAO, 0x0E80, 0x0EFF),
|
|
(UNICODE_TIBETAN, 0x0F00, 0x0FFF),
|
|
(UNICODE_MYANMAR, 0x1000, 0x109F),
|
|
(UNICODE_GEORGIAN, 0x10A0, 0x10FF),
|
|
(UNICODE_HANGUL_JAMO, 0x1100, 0x11FF),
|
|
(UNICODE_ETHIOPIC, 0x1200, 0x137F),
|
|
(UNICODE_ETHIOPIC_SUPPLEMENT, 0x1380, 0x139F),
|
|
(UNICODE_CHEROKEE, 0x13A0, 0x13FF),
|
|
(UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 0x1400, 0x167F),
|
|
(UNICODE_OGHAM, 0x1680, 0x169F),
|
|
(UNICODE_RUNIC, 0x16A0, 0x16FF),
|
|
(UNICODE_TAGALOG, 0x1700, 0x171F),
|
|
(UNICODE_HANUNOO, 0x1720, 0x173F),
|
|
(UNICODE_BUHID, 0x1740, 0x175F),
|
|
(UNICODE_TAGBANWA, 0x1760, 0x177F),
|
|
(UNICODE_KHMER, 0x1780, 0x17FF),
|
|
(UNICODE_MONGOLIAN, 0x1800, 0x18AF),
|
|
(UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 0x18B0, 0x18FF),
|
|
(UNICODE_LIMBU, 0x1900, 0x194F),
|
|
(UNICODE_TAI_LE, 0x1950, 0x197F),
|
|
(UNICODE_NEW_TAI_LUE, 0x1980, 0x19DF),
|
|
(UNICODE_KHMER_SYMBOLS, 0x19E0, 0x19FF),
|
|
(UNICODE_BUGINESE, 0x1A00, 0x1A1F),
|
|
(UNICODE_TAI_THAM, 0x1A20, 0x1AAF),
|
|
(UNICODE_BALINESE, 0x1B00, 0x1B7F),
|
|
(UNICODE_SUNDANESE, 0x1B80, 0x1BBF),
|
|
(UNICODE_BATAK, 0x1BC0, 0x1BFF),
|
|
(UNICODE_LEPCHA, 0x1C00, 0x1C4F),
|
|
(UNICODE_OL_CHIKI, 0x1C50, 0x1C7F),
|
|
(UNICODE_SUNDANESE_SUPPLEMENT, 0x1CC0, 0x1CCF),
|
|
(UNICODE_VEDIC_EXTENSIONS, 0x1CD0, 0x1CFF),
|
|
(UNICODE_PHONETIC_EXTENSIONS, 0x1D00, 0x1D7F),
|
|
(UNICODE_PHONETIC_EXTENSIONS_SUPPLEMENT, 0x1D80, 0x1DBF),
|
|
(UNICODE_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 0x1DC0, 0x1DFF),
|
|
(UNICODE_LATIN_EXTENDED_ADDITIONAL, 0x1E00, 0x1EFF),
|
|
(UNICODE_GREEK_EXTENDED, 0x1F00, 0x1FFF),
|
|
(UNICODE_GENERAL_PUNCTUATION, 0x2000, 0x206F),
|
|
(UNICODE_SUPERSCRIPTS_AND_SUBSCRIPTS, 0x2070, 0x209F),
|
|
(UNICODE_CURRENCY_SYMBOLS, 0x20A0, 0x20CF),
|
|
(UNICODE_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS, 0x20D0, 0x20FF),
|
|
(UNICODE_LETTERLIKE_SYMBOLS, 0x2100, 0x214F),
|
|
(UNICODE_NUMBER_FORMS, 0x2150, 0x218F),
|
|
(UNICODE_ARROWS, 0x2190, 0x21FF),
|
|
(UNICODE_MATHEMATICAL_OPERATORS, 0x2200, 0x22FF),
|
|
(UNICODE_MISCELLANEOUS_TECHNICAL, 0x2300, 0x23FF),
|
|
(UNICODE_CONTROL_PICTURES, 0x2400, 0x243F),
|
|
(UNICODE_OPTICAL_CHARACTER_RECOGNITION, 0x2440, 0x245F),
|
|
(UNICODE_ENCLOSED_ALPHANUMERICS, 0x2460, 0x24FF),
|
|
(UNICODE_BOX_DRAWING, 0x2500, 0x257F),
|
|
(UNICODE_BLOCK_ELEMENTS, 0x2580, 0x259F),
|
|
(UNICODE_GEOMETRIC_SHAPES, 0x25A0, 0x25FF),
|
|
(UNICODE_MISCELLANEOUS_SYMBOLS, 0x2600, 0x26FF),
|
|
(UNICODE_DINGBATS, 0x2700, 0x27BF),
|
|
(UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 0x27C0, 0x27EF),
|
|
(UNICODE_SUPPLEMENTAL_ARROWS_A, 0x27F0, 0x27FF),
|
|
(UNICODE_BRAILLE_PATTERNS, 0x2800, 0x28FF),
|
|
(UNICODE_SUPPLEMENTAL_ARROWS_B, 0x2900, 0x297F),
|
|
(UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 0x2980, 0x29FF),
|
|
(UNICODE_SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 0x2A00, 0x2AFF),
|
|
(UNICODE_MISCELLANEOUS_SYMBOLS_AND_ARROWS, 0x2B00, 0x2BFF),
|
|
(UNICODE_GLAGOLITIC, 0x2C00, 0x2C5F),
|
|
(UNICODE_LATIN_EXTENDED_C, 0x2C60, 0x2C7F),
|
|
(UNICODE_COPTIC, 0x2C80, 0x2CFF),
|
|
(UNICODE_GEORGIAN_SUPPLEMENT, 0x2D00, 0x2D2F),
|
|
(UNICODE_TIFINAGH, 0x2D30, 0x2D7F),
|
|
(UNICODE_ETHIOPIC_EXTENDED, 0x2D80, 0x2DDF),
|
|
(UNICODE_CYRILLIC_EXTENDED_A, 0x2DE0, 0x2DFF),
|
|
(UNICODE_SUPPLEMENTAL_PUNCTUATION, 0x2E00, 0x2E7F),
|
|
(UNICODE_CJK_RADICALS_SUPPLEMENT, 0x2E80, 0x2EFF),
|
|
(UNICODE_KANGXI_RADICALS, 0x2F00, 0x2FDF),
|
|
(UNICODE_IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 0x2FF0, 0x2FFF),
|
|
(UNICODE_CJK_SYMBOLS_AND_PUNCTUATION, 0x3000, 0x303F),
|
|
(UNICODE_HIRAGANA, 0x3040, 0x309F),
|
|
(UNICODE_KATAKANA, 0x30A0, 0x30FF),
|
|
(UNICODE_BOPOMOFO, 0x3100, 0x312F),
|
|
(UNICODE_HANGUL_COMPATIBILITY_JAMO, 0x3130, 0x318F),
|
|
(UNICODE_KANBUN, 0x3190, 0x319F),
|
|
(UNICODE_BOPOMOFO_EXTENDED, 0x31A0, 0x31BF),
|
|
(UNICODE_CJK_STROKES, 0x31C0, 0x31EF),
|
|
(UNICODE_KATAKANA_PHONETIC_EXTENSIONS, 0x31F0, 0x31FF),
|
|
(UNICODE_ENCLOSED_CJK_LETTERS_AND_MONTHS, 0x3200, 0x32FF),
|
|
(UNICODE_CJK_COMPATIBILITY, 0x3300, 0x33FF),
|
|
(UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 0x3400, 0x4DBF),
|
|
(UNICODE_YIJING_HEXAGRAM_SYMBOLS, 0x4DC0, 0x4DFF),
|
|
(UNICODE_CJK_UNIFIED_IDEOGRAPHS, 0x4E00, 0x9FFF),
|
|
(UNICODE_YI_SYLLABLES, 0xA000, 0xA48F),
|
|
(UNICODE_YI_RADICALS, 0xA490, 0xA4CF),
|
|
(UNICODE_LISU, 0xA4D0, 0xA4FF),
|
|
(UNICODE_VAI, 0xA500, 0xA63F),
|
|
(UNICODE_CYRILLIC_EXTENDED_B, 0xA640, 0xA69F),
|
|
(UNICODE_BAMUM, 0xA6A0, 0xA6FF),
|
|
(UNICODE_MODIFIER_TONE_LETTERS, 0xA700, 0xA71F),
|
|
(UNICODE_LATIN_EXTENDED_D, 0xA720, 0xA7FF),
|
|
(UNICODE_SYLOTI_NAGRI, 0xA800, 0xA82F),
|
|
(UNICODE_COMMON_INDIC_NUMBER_FORMS, 0xA830, 0xA83F),
|
|
(UNICODE_PHAGS_PA, 0xA840, 0xA87F),
|
|
(UNICODE_SAURASHTRA, 0xA880, 0xA8DF),
|
|
(UNICODE_DEVANAGARI_EXTENDED, 0xA8E0, 0xA8FF),
|
|
(UNICODE_KAYAH_LI, 0xA900, 0xA92F),
|
|
(UNICODE_REJANG, 0xA930, 0xA95F),
|
|
(UNICODE_HANGUL_JAMO_EXTENDED_A, 0xA960, 0xA97F),
|
|
(UNICODE_JAVANESE, 0xA980, 0xA9DF),
|
|
(UNICODE_CHAM, 0xAA00, 0xAA5F),
|
|
(UNICODE_MYANMAR_EXTENDED_A, 0xAA60, 0xAA7F),
|
|
(UNICODE_TAI_VIET, 0xAA80, 0xAADF),
|
|
(UNICODE_MEETEI_MAYEK_EXTENSIONS, 0xAAE0, 0xAAFF),
|
|
(UNICODE_ETHIOPIC_EXTENDED_A, 0xAB00, 0xAB2F),
|
|
(UNICODE_MEETEI_MAYEK, 0xABC0, 0xABFF),
|
|
(UNICODE_HANGUL_SYLLABLES, 0xAC00, 0xD7AF),
|
|
(UNICODE_HANGUL_JAMO_EXTENDED_B, 0xD7B0, 0xD7FF),
|
|
(UNICODE_HIGH_SURROGATES, 0xD800, 0xDB7F),
|
|
(UNICODE_HIGH_PRIVATE_USE_SURROGATES, 0xDB80, 0xDBFF),
|
|
(UNICODE_LOW_SURROGATES, 0xDC00, 0xDFFF),
|
|
(UNICODE_PRIVATE_USE_AREA, 0xE000, 0xF8FF),
|
|
(UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS, 0xF900, 0xFAFF),
|
|
(UNICODE_ALPHABETIC_PRESENTATION_FORMS, 0xFB00, 0xFB4F),
|
|
(UNICODE_ARABIC_PRESENTATION_FORMS_A, 0xFB50, 0xFDFF),
|
|
(UNICODE_VARIATION_SELECTORS, 0xFE00, 0xFE0F),
|
|
(UNICODE_VERTICAL_FORMS, 0xFE10, 0xFE1F),
|
|
(UNICODE_COMBINING_HALF_MARKS, 0xFE20, 0xFE2F),
|
|
(UNICODE_CJK_COMPATIBILITY_FORMS, 0xFE30, 0xFE4F),
|
|
(UNICODE_SMALL_FORM_VARIANTS, 0xFE50, 0xFE6F),
|
|
(UNICODE_ARABIC_PRESENTATION_FORMS_B, 0xFE70, 0xFEFF),
|
|
(UNICODE_HALFWIDTH_AND_FULLWIDTH_FORMS, 0xFF00, 0xFFEF),
|
|
(UNICODE_SPECIALS, 0xFFF0, 0xFFFF),
|
|
(UNICODE_LINEAR_B_SYLLABARY, 0x10000, 0x1007F),
|
|
(UNICODE_LINEAR_B_IDEOGRAMS, 0x10080, 0x100FF),
|
|
(UNICODE_AEGEAN_NUMBERS, 0x10100, 0x1013F),
|
|
(UNICODE_ANCIENT_GREEK_NUMBERS, 0x10140, 0x1018F),
|
|
(UNICODE_ANCIENT_SYMBOLS, 0x10190, 0x101CF),
|
|
(UNICODE_PHAISTOS_DISC, 0x101D0, 0x101FF),
|
|
(UNICODE_LYCIAN, 0x10280, 0x1029F),
|
|
(UNICODE_CARIAN, 0x102A0, 0x102DF),
|
|
(UNICODE_OLD_ITALIC, 0x10300, 0x1032F),
|
|
(UNICODE_GOTHIC, 0x10330, 0x1034F),
|
|
(UNICODE_UGARITIC, 0x10380, 0x1039F),
|
|
(UNICODE_OLD_PERSIAN, 0x103A0, 0x103DF),
|
|
(UNICODE_DESERET, 0x10400, 0x1044F),
|
|
(UNICODE_SHAVIAN, 0x10450, 0x1047F),
|
|
(UNICODE_OSMANYA, 0x10480, 0x104AF),
|
|
(UNICODE_CYPRIOT_SYLLABARY, 0x10800, 0x1083F),
|
|
(UNICODE_IMPERIAL_ARAMAIC, 0x10840, 0x1085F),
|
|
(UNICODE_PHOENICIAN, 0x10900, 0x1091F),
|
|
(UNICODE_LYDIAN, 0x10920, 0x1093F),
|
|
(UNICODE_MEROITIC_HIEROGLYPHS, 0x10980, 0x1099F),
|
|
(UNICODE_MEROITIC_CURSIVE, 0x109A0, 0x109FF),
|
|
(UNICODE_KHAROSHTHI, 0x10A00, 0x10A5F),
|
|
(UNICODE_OLD_SOUTH_ARABIAN, 0x10A60, 0x10A7F),
|
|
(UNICODE_AVESTAN, 0x10B00, 0x10B3F),
|
|
(UNICODE_INSCRIPTIONAL_PARTHIAN, 0x10B40, 0x10B5F),
|
|
(UNICODE_INSCRIPTIONAL_PAHLAVI, 0x10B60, 0x10B7F),
|
|
(UNICODE_OLD_TURKIC, 0x10C00, 0x10C4F),
|
|
(UNICODE_RUMI_NUMERAL_SYMBOLS, 0x10E60, 0x10E7F),
|
|
(UNICODE_BRAHMI, 0x11000, 0x1107F),
|
|
(UNICODE_KAITHI, 0x11080, 0x110CF),
|
|
(UNICODE_SORA_SOMPENG, 0x110D0, 0x110FF),
|
|
(UNICODE_CHAKMA, 0x11100, 0x1114F),
|
|
(UNICODE_SHARADA, 0x11180, 0x111DF),
|
|
(UNICODE_TAKRI, 0x11680, 0x116CF),
|
|
(UNICODE_CUNEIFORM, 0x12000, 0x123FF),
|
|
(UNICODE_CUNEIFORM_NUMBERS_AND_PUNCTUATION, 0x12400, 0x1247F),
|
|
(UNICODE_EGYPTIAN_HIEROGLYPHS, 0x13000, 0x1342F),
|
|
(UNICODE_BAMUM_SUPPLEMENT, 0x16800, 0x16A3F),
|
|
(UNICODE_MIAO, 0x16F00, 0x16F9F),
|
|
(UNICODE_KANA_SUPPLEMENT, 0x1B000, 0x1B0FF),
|
|
(UNICODE_BYZANTINE_MUSICAL_SYMBOLS, 0x1D000, 0x1D0FF),
|
|
(UNICODE_MUSICAL_SYMBOLS, 0x1D100, 0x1D1FF),
|
|
(UNICODE_ANCIENT_GREEK_MUSICAL_NOTATION, 0x1D200, 0x1D24F),
|
|
(UNICODE_TAI_XUAN_JING_SYMBOLS, 0x1D300, 0x1D35F),
|
|
(UNICODE_COUNTING_ROD_NUMERALS, 0x1D360, 0x1D37F),
|
|
(UNICODE_MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 0x1D400, 0x1D7FF),
|
|
(UNICODE_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 0x1EE00, 0x1EEFF),
|
|
(UNICODE_MAHJONG_TILES, 0x1F000, 0x1F02F),
|
|
(UNICODE_DOMINO_TILES, 0x1F030, 0x1F09F),
|
|
(UNICODE_PLAYING_CARDS, 0x1F0A0, 0x1F0FF),
|
|
(UNICODE_ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 0x1F100, 0x1F1FF),
|
|
(UNICODE_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 0x1F200, 0x1F2FF),
|
|
(UNICODE_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 0x1F300, 0x1F5FF),
|
|
(UNICODE_EMOTICONS, 0x1F600, 0x1F64F),
|
|
(UNICODE_TRANSPORT_AND_MAP_SYMBOLS, 0x1F680, 0x1F6FF),
|
|
(UNICODE_ALCHEMICAL_SYMBOLS, 0x1F700, 0x1F77F),
|
|
(UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 0x20000, 0x2A6DF),
|
|
(UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 0x2A700, 0x2B73F),
|
|
(UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 0x2B740, 0x2B81F),
|
|
(UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 0x2F800, 0x2FA1F),
|
|
(UNICODE_TAGS, 0xE0000, 0xE007F),
|
|
(UNICODE_VARIATION_SELECTORS_SUPPLEMENT, 0xE0100, 0xE01EF),
|
|
(UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_A, 0xF0000, 0xFFFFF),
|
|
(UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_B, 0x100000, 0x10FFFF),
|
|
]
|
|
|
|
NUM_BLOCKS = len(_unicode_blocks)
|
|
|
|
|
|
def unicode_block(ch):
|
|
'''Return the Unicode block name for ch, or None if ch has no block.'''
|
|
cp = ord(ch)
|
|
# special case basic latin
|
|
if cp <= 0x7F:
|
|
return UNICODE_BASIC_LATIN
|
|
# binary search for the correct block
|
|
be, en = 0, NUM_BLOCKS - 1
|
|
while be <= en:
|
|
mid = (be+en) >> 1
|
|
name, start, end = _unicode_blocks[mid]
|
|
if start <= cp <= end:
|
|
return name
|
|
if cp < start:
|
|
en = mid-1
|
|
else:
|
|
be = mid+1
|