UNICODE_BASIC_LATIN = 1 UNICODE_LATIN_1_SUPPLEMENT = 2 UNICODE_LATIN_EXTENDED_A = 3 UNICODE_LATIN_EXTENDED_B = 4 UNICODE_IPA_EXTENSIONS = 5 UNICODE_SPACING_MODIFIER_LETTERS = 6 UNICODE_COMBINING_DIACRITICAL_MARKS = 7 UNICODE_GREEK_AND_COPTIC = 8 UNICODE_CYRILLIC = 9 UNICODE_CYRILLIC_SUPPLEMENT = 10 UNICODE_ARMENIAN = 11 UNICODE_HEBREW = 12 UNICODE_ARABIC = 13 UNICODE_SYRIAC = 14 UNICODE_ARABIC_SUPPLEMENT = 15 UNICODE_THAANA = 16 UNICODE_NKO = 17 UNICODE_SAMARITAN = 18 UNICODE_MANDAIC = 19 UNICODE_ARABIC_EXTENDED_A = 20 UNICODE_DEVANAGARI = 21 UNICODE_BENGALI = 22 UNICODE_GURMUKHI = 23 UNICODE_GUJARATI = 24 UNICODE_ORIYA = 25 UNICODE_TAMIL = 26 UNICODE_TELUGU = 27 UNICODE_KANNADA = 28 UNICODE_MALAYALAM = 29 UNICODE_SINHALA = 30 UNICODE_THAI = 31 UNICODE_LAO = 32 UNICODE_TIBETAN = 33 UNICODE_MYANMAR = 34 UNICODE_GEORGIAN = 35 UNICODE_HANGUL_JAMO = 36 UNICODE_ETHIOPIC = 37 UNICODE_ETHIOPIC_SUPPLEMENT = 38 UNICODE_CHEROKEE = 39 UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 40 UNICODE_OGHAM = 41 UNICODE_RUNIC = 42 UNICODE_TAGALOG = 43 UNICODE_HANUNOO = 44 UNICODE_BUHID = 45 UNICODE_TAGBANWA = 46 UNICODE_KHMER = 47 UNICODE_MONGOLIAN = 48 UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 49 UNICODE_LIMBU = 50 UNICODE_TAI_LE = 51 UNICODE_NEW_TAI_LUE = 52 UNICODE_KHMER_SYMBOLS = 53 UNICODE_BUGINESE = 54 UNICODE_TAI_THAM = 55 UNICODE_BALINESE = 56 UNICODE_SUNDANESE = 57 UNICODE_BATAK = 58 UNICODE_LEPCHA = 59 UNICODE_OL_CHIKI = 60 UNICODE_SUNDANESE_SUPPLEMENT = 61 UNICODE_VEDIC_EXTENSIONS = 62 UNICODE_PHONETIC_EXTENSIONS = 63 UNICODE_PHONETIC_EXTENSIONS_SUPPLEMENT = 64 UNICODE_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 65 UNICODE_LATIN_EXTENDED_ADDITIONAL = 66 UNICODE_GREEK_EXTENDED = 67 UNICODE_GENERAL_PUNCTUATION = 68 UNICODE_SUPERSCRIPTS_AND_SUBSCRIPTS = 69 UNICODE_CURRENCY_SYMBOLS = 70 UNICODE_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS = 71 UNICODE_LETTERLIKE_SYMBOLS = 72 UNICODE_NUMBER_FORMS = 73 UNICODE_ARROWS = 74 UNICODE_MATHEMATICAL_OPERATORS = 75 UNICODE_MISCELLANEOUS_TECHNICAL = 76 UNICODE_CONTROL_PICTURES = 77 UNICODE_OPTICAL_CHARACTER_RECOGNITION = 78 UNICODE_ENCLOSED_ALPHANUMERICS = 79 UNICODE_BOX_DRAWING = 80 UNICODE_BLOCK_ELEMENTS = 81 UNICODE_GEOMETRIC_SHAPES = 82 UNICODE_MISCELLANEOUS_SYMBOLS = 83 UNICODE_DINGBATS = 84 UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 85 UNICODE_SUPPLEMENTAL_ARROWS_A = 86 UNICODE_BRAILLE_PATTERNS = 87 UNICODE_SUPPLEMENTAL_ARROWS_B = 88 UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 89 UNICODE_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 90 UNICODE_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 91 UNICODE_GLAGOLITIC = 92 UNICODE_LATIN_EXTENDED_C = 93 UNICODE_COPTIC = 94 UNICODE_GEORGIAN_SUPPLEMENT = 95 UNICODE_TIFINAGH = 96 UNICODE_ETHIOPIC_EXTENDED = 97 UNICODE_CYRILLIC_EXTENDED_A = 98 UNICODE_SUPPLEMENTAL_PUNCTUATION = 99 UNICODE_CJK_RADICALS_SUPPLEMENT = 100 UNICODE_KANGXI_RADICALS = 101 UNICODE_IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 102 UNICODE_CJK_SYMBOLS_AND_PUNCTUATION = 103 UNICODE_HIRAGANA = 104 UNICODE_KATAKANA = 105 UNICODE_BOPOMOFO = 106 UNICODE_HANGUL_COMPATIBILITY_JAMO = 107 UNICODE_KANBUN = 108 UNICODE_BOPOMOFO_EXTENDED = 109 UNICODE_CJK_STROKES = 110 UNICODE_KATAKANA_PHONETIC_EXTENSIONS = 111 UNICODE_ENCLOSED_CJK_LETTERS_AND_MONTHS = 112 UNICODE_CJK_COMPATIBILITY = 113 UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 114 UNICODE_YIJING_HEXAGRAM_SYMBOLS = 115 UNICODE_CJK_UNIFIED_IDEOGRAPHS = 116 UNICODE_YI_SYLLABLES = 117 UNICODE_YI_RADICALS = 118 UNICODE_LISU = 119 UNICODE_VAI = 120 UNICODE_CYRILLIC_EXTENDED_B = 121 UNICODE_BAMUM = 122 UNICODE_MODIFIER_TONE_LETTERS = 123 UNICODE_LATIN_EXTENDED_D = 124 UNICODE_SYLOTI_NAGRI = 125 UNICODE_COMMON_INDIC_NUMBER_FORMS = 126 UNICODE_PHAGS_PA = 127 UNICODE_SAURASHTRA = 128 UNICODE_DEVANAGARI_EXTENDED = 129 UNICODE_KAYAH_LI = 130 UNICODE_REJANG = 131 UNICODE_HANGUL_JAMO_EXTENDED_A = 132 UNICODE_JAVANESE = 133 UNICODE_CHAM = 134 UNICODE_MYANMAR_EXTENDED_A = 135 UNICODE_TAI_VIET = 136 UNICODE_MEETEI_MAYEK_EXTENSIONS = 137 UNICODE_ETHIOPIC_EXTENDED_A = 138 UNICODE_MEETEI_MAYEK = 139 UNICODE_HANGUL_SYLLABLES = 140 UNICODE_HANGUL_JAMO_EXTENDED_B = 141 UNICODE_HIGH_SURROGATES = 142 UNICODE_HIGH_PRIVATE_USE_SURROGATES = 143 UNICODE_LOW_SURROGATES = 144 UNICODE_PRIVATE_USE_AREA = 145 UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS = 146 UNICODE_ALPHABETIC_PRESENTATION_FORMS = 147 UNICODE_ARABIC_PRESENTATION_FORMS_A = 148 UNICODE_VARIATION_SELECTORS = 149 UNICODE_VERTICAL_FORMS = 150 UNICODE_COMBINING_HALF_MARKS = 151 UNICODE_CJK_COMPATIBILITY_FORMS = 152 UNICODE_SMALL_FORM_VARIANTS = 153 UNICODE_ARABIC_PRESENTATION_FORMS_B = 154 UNICODE_HALFWIDTH_AND_FULLWIDTH_FORMS = 155 UNICODE_SPECIALS = 156 UNICODE_LINEAR_B_SYLLABARY = 157 UNICODE_LINEAR_B_IDEOGRAMS = 158 UNICODE_AEGEAN_NUMBERS = 159 UNICODE_ANCIENT_GREEK_NUMBERS = 160 UNICODE_ANCIENT_SYMBOLS = 161 UNICODE_PHAISTOS_DISC = 162 UNICODE_LYCIAN = 163 UNICODE_CARIAN = 164 UNICODE_OLD_ITALIC = 165 UNICODE_GOTHIC = 166 UNICODE_UGARITIC = 167 UNICODE_OLD_PERSIAN = 168 UNICODE_DESERET = 169 UNICODE_SHAVIAN = 170 UNICODE_OSMANYA = 171 UNICODE_CYPRIOT_SYLLABARY = 172 UNICODE_IMPERIAL_ARAMAIC = 173 UNICODE_PHOENICIAN = 174 UNICODE_LYDIAN = 175 UNICODE_MEROITIC_HIEROGLYPHS = 176 UNICODE_MEROITIC_CURSIVE = 177 UNICODE_KHAROSHTHI = 178 UNICODE_OLD_SOUTH_ARABIAN = 179 UNICODE_AVESTAN = 180 UNICODE_INSCRIPTIONAL_PARTHIAN = 181 UNICODE_INSCRIPTIONAL_PAHLAVI = 182 UNICODE_OLD_TURKIC = 183 UNICODE_RUMI_NUMERAL_SYMBOLS = 184 UNICODE_BRAHMI = 185 UNICODE_KAITHI = 186 UNICODE_SORA_SOMPENG = 187 UNICODE_CHAKMA = 188 UNICODE_SHARADA = 189 UNICODE_TAKRI = 190 UNICODE_CUNEIFORM = 191 UNICODE_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 192 UNICODE_EGYPTIAN_HIEROGLYPHS = 193 UNICODE_BAMUM_SUPPLEMENT = 194 UNICODE_MIAO = 195 UNICODE_KANA_SUPPLEMENT = 196 UNICODE_BYZANTINE_MUSICAL_SYMBOLS = 197 UNICODE_MUSICAL_SYMBOLS = 198 UNICODE_ANCIENT_GREEK_MUSICAL_NOTATION = 199 UNICODE_TAI_XUAN_JING_SYMBOLS = 200 UNICODE_COUNTING_ROD_NUMERALS = 201 UNICODE_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 202 UNICODE_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 203 UNICODE_MAHJONG_TILES = 204 UNICODE_DOMINO_TILES = 205 UNICODE_PLAYING_CARDS = 206 UNICODE_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 207 UNICODE_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 208 UNICODE_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 209 UNICODE_EMOTICONS = 210 UNICODE_TRANSPORT_AND_MAP_SYMBOLS = 211 UNICODE_ALCHEMICAL_SYMBOLS = 212 UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 213 UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 214 UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 215 UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 216 UNICODE_TAGS = 217 UNICODE_VARIATION_SELECTORS_SUPPLEMENT = 218 UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 219 UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 220 _unicode_blocks = [ (UNICODE_BASIC_LATIN, 0x0000, 0x007F), (UNICODE_LATIN_1_SUPPLEMENT, 0x0080, 0x00FF), (UNICODE_LATIN_EXTENDED_A, 0x0100, 0x017F), (UNICODE_LATIN_EXTENDED_B, 0x0180, 0x024F), (UNICODE_IPA_EXTENSIONS, 0x0250, 0x02AF), (UNICODE_SPACING_MODIFIER_LETTERS, 0x02B0, 0x02FF), (UNICODE_COMBINING_DIACRITICAL_MARKS, 0x0300, 0x036F), (UNICODE_GREEK_AND_COPTIC, 0x0370, 0x03FF), (UNICODE_CYRILLIC, 0x0400, 0x04FF), (UNICODE_CYRILLIC_SUPPLEMENT, 0x0500, 0x052F), (UNICODE_ARMENIAN, 0x0530, 0x058F), (UNICODE_HEBREW, 0x0590, 0x05FF), (UNICODE_ARABIC, 0x0600, 0x06FF), (UNICODE_SYRIAC, 0x0700, 0x074F), (UNICODE_ARABIC_SUPPLEMENT, 0x0750, 0x077F), (UNICODE_THAANA, 0x0780, 0x07BF), (UNICODE_NKO, 0x07C0, 0x07FF), (UNICODE_SAMARITAN, 0x0800, 0x083F), (UNICODE_MANDAIC, 0x0840, 0x085F), (UNICODE_ARABIC_EXTENDED_A, 0x08A0, 0x08FF), (UNICODE_DEVANAGARI, 0x0900, 0x097F), (UNICODE_BENGALI, 0x0980, 0x09FF), (UNICODE_GURMUKHI, 0x0A00, 0x0A7F), (UNICODE_GUJARATI, 0x0A80, 0x0AFF), (UNICODE_ORIYA, 0x0B00, 0x0B7F), (UNICODE_TAMIL, 0x0B80, 0x0BFF), (UNICODE_TELUGU, 0x0C00, 0x0C7F), (UNICODE_KANNADA, 0x0C80, 0x0CFF), (UNICODE_MALAYALAM, 0x0D00, 0x0D7F), (UNICODE_SINHALA, 0x0D80, 0x0DFF), (UNICODE_THAI, 0x0E00, 0x0E7F), (UNICODE_LAO, 0x0E80, 0x0EFF), (UNICODE_TIBETAN, 0x0F00, 0x0FFF), (UNICODE_MYANMAR, 0x1000, 0x109F), (UNICODE_GEORGIAN, 0x10A0, 0x10FF), (UNICODE_HANGUL_JAMO, 0x1100, 0x11FF), (UNICODE_ETHIOPIC, 0x1200, 0x137F), (UNICODE_ETHIOPIC_SUPPLEMENT, 0x1380, 0x139F), (UNICODE_CHEROKEE, 0x13A0, 0x13FF), (UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 0x1400, 0x167F), (UNICODE_OGHAM, 0x1680, 0x169F), (UNICODE_RUNIC, 0x16A0, 0x16FF), (UNICODE_TAGALOG, 0x1700, 0x171F), (UNICODE_HANUNOO, 0x1720, 0x173F), (UNICODE_BUHID, 0x1740, 0x175F), (UNICODE_TAGBANWA, 0x1760, 0x177F), (UNICODE_KHMER, 0x1780, 0x17FF), (UNICODE_MONGOLIAN, 0x1800, 0x18AF), (UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 0x18B0, 0x18FF), (UNICODE_LIMBU, 0x1900, 0x194F), (UNICODE_TAI_LE, 0x1950, 0x197F), (UNICODE_NEW_TAI_LUE, 0x1980, 0x19DF), (UNICODE_KHMER_SYMBOLS, 0x19E0, 0x19FF), (UNICODE_BUGINESE, 0x1A00, 0x1A1F), (UNICODE_TAI_THAM, 0x1A20, 0x1AAF), (UNICODE_BALINESE, 0x1B00, 0x1B7F), (UNICODE_SUNDANESE, 0x1B80, 0x1BBF), (UNICODE_BATAK, 0x1BC0, 0x1BFF), (UNICODE_LEPCHA, 0x1C00, 0x1C4F), (UNICODE_OL_CHIKI, 0x1C50, 0x1C7F), (UNICODE_SUNDANESE_SUPPLEMENT, 0x1CC0, 0x1CCF), (UNICODE_VEDIC_EXTENSIONS, 0x1CD0, 0x1CFF), (UNICODE_PHONETIC_EXTENSIONS, 0x1D00, 0x1D7F), (UNICODE_PHONETIC_EXTENSIONS_SUPPLEMENT, 0x1D80, 0x1DBF), (UNICODE_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 0x1DC0, 0x1DFF), (UNICODE_LATIN_EXTENDED_ADDITIONAL, 0x1E00, 0x1EFF), (UNICODE_GREEK_EXTENDED, 0x1F00, 0x1FFF), (UNICODE_GENERAL_PUNCTUATION, 0x2000, 0x206F), (UNICODE_SUPERSCRIPTS_AND_SUBSCRIPTS, 0x2070, 0x209F), (UNICODE_CURRENCY_SYMBOLS, 0x20A0, 0x20CF), (UNICODE_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS, 0x20D0, 0x20FF), (UNICODE_LETTERLIKE_SYMBOLS, 0x2100, 0x214F), (UNICODE_NUMBER_FORMS, 0x2150, 0x218F), (UNICODE_ARROWS, 0x2190, 0x21FF), (UNICODE_MATHEMATICAL_OPERATORS, 0x2200, 0x22FF), (UNICODE_MISCELLANEOUS_TECHNICAL, 0x2300, 0x23FF), (UNICODE_CONTROL_PICTURES, 0x2400, 0x243F), (UNICODE_OPTICAL_CHARACTER_RECOGNITION, 0x2440, 0x245F), (UNICODE_ENCLOSED_ALPHANUMERICS, 0x2460, 0x24FF), (UNICODE_BOX_DRAWING, 0x2500, 0x257F), (UNICODE_BLOCK_ELEMENTS, 0x2580, 0x259F), (UNICODE_GEOMETRIC_SHAPES, 0x25A0, 0x25FF), (UNICODE_MISCELLANEOUS_SYMBOLS, 0x2600, 0x26FF), (UNICODE_DINGBATS, 0x2700, 0x27BF), (UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 0x27C0, 0x27EF), (UNICODE_SUPPLEMENTAL_ARROWS_A, 0x27F0, 0x27FF), (UNICODE_BRAILLE_PATTERNS, 0x2800, 0x28FF), (UNICODE_SUPPLEMENTAL_ARROWS_B, 0x2900, 0x297F), (UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 0x2980, 0x29FF), (UNICODE_SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 0x2A00, 0x2AFF), (UNICODE_MISCELLANEOUS_SYMBOLS_AND_ARROWS, 0x2B00, 0x2BFF), (UNICODE_GLAGOLITIC, 0x2C00, 0x2C5F), (UNICODE_LATIN_EXTENDED_C, 0x2C60, 0x2C7F), (UNICODE_COPTIC, 0x2C80, 0x2CFF), (UNICODE_GEORGIAN_SUPPLEMENT, 0x2D00, 0x2D2F), (UNICODE_TIFINAGH, 0x2D30, 0x2D7F), (UNICODE_ETHIOPIC_EXTENDED, 0x2D80, 0x2DDF), (UNICODE_CYRILLIC_EXTENDED_A, 0x2DE0, 0x2DFF), (UNICODE_SUPPLEMENTAL_PUNCTUATION, 0x2E00, 0x2E7F), (UNICODE_CJK_RADICALS_SUPPLEMENT, 0x2E80, 0x2EFF), (UNICODE_KANGXI_RADICALS, 0x2F00, 0x2FDF), (UNICODE_IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 0x2FF0, 0x2FFF), (UNICODE_CJK_SYMBOLS_AND_PUNCTUATION, 0x3000, 0x303F), (UNICODE_HIRAGANA, 0x3040, 0x309F), (UNICODE_KATAKANA, 0x30A0, 0x30FF), (UNICODE_BOPOMOFO, 0x3100, 0x312F), (UNICODE_HANGUL_COMPATIBILITY_JAMO, 0x3130, 0x318F), (UNICODE_KANBUN, 0x3190, 0x319F), (UNICODE_BOPOMOFO_EXTENDED, 0x31A0, 0x31BF), (UNICODE_CJK_STROKES, 0x31C0, 0x31EF), (UNICODE_KATAKANA_PHONETIC_EXTENSIONS, 0x31F0, 0x31FF), (UNICODE_ENCLOSED_CJK_LETTERS_AND_MONTHS, 0x3200, 0x32FF), (UNICODE_CJK_COMPATIBILITY, 0x3300, 0x33FF), (UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 0x3400, 0x4DBF), (UNICODE_YIJING_HEXAGRAM_SYMBOLS, 0x4DC0, 0x4DFF), (UNICODE_CJK_UNIFIED_IDEOGRAPHS, 0x4E00, 0x9FFF), (UNICODE_YI_SYLLABLES, 0xA000, 0xA48F), (UNICODE_YI_RADICALS, 0xA490, 0xA4CF), (UNICODE_LISU, 0xA4D0, 0xA4FF), (UNICODE_VAI, 0xA500, 0xA63F), (UNICODE_CYRILLIC_EXTENDED_B, 0xA640, 0xA69F), (UNICODE_BAMUM, 0xA6A0, 0xA6FF), (UNICODE_MODIFIER_TONE_LETTERS, 0xA700, 0xA71F), (UNICODE_LATIN_EXTENDED_D, 0xA720, 0xA7FF), (UNICODE_SYLOTI_NAGRI, 0xA800, 0xA82F), (UNICODE_COMMON_INDIC_NUMBER_FORMS, 0xA830, 0xA83F), (UNICODE_PHAGS_PA, 0xA840, 0xA87F), (UNICODE_SAURASHTRA, 0xA880, 0xA8DF), (UNICODE_DEVANAGARI_EXTENDED, 0xA8E0, 0xA8FF), (UNICODE_KAYAH_LI, 0xA900, 0xA92F), (UNICODE_REJANG, 0xA930, 0xA95F), (UNICODE_HANGUL_JAMO_EXTENDED_A, 0xA960, 0xA97F), (UNICODE_JAVANESE, 0xA980, 0xA9DF), (UNICODE_CHAM, 0xAA00, 0xAA5F), (UNICODE_MYANMAR_EXTENDED_A, 0xAA60, 0xAA7F), (UNICODE_TAI_VIET, 0xAA80, 0xAADF), (UNICODE_MEETEI_MAYEK_EXTENSIONS, 0xAAE0, 0xAAFF), (UNICODE_ETHIOPIC_EXTENDED_A, 0xAB00, 0xAB2F), (UNICODE_MEETEI_MAYEK, 0xABC0, 0xABFF), (UNICODE_HANGUL_SYLLABLES, 0xAC00, 0xD7AF), (UNICODE_HANGUL_JAMO_EXTENDED_B, 0xD7B0, 0xD7FF), (UNICODE_HIGH_SURROGATES, 0xD800, 0xDB7F), (UNICODE_HIGH_PRIVATE_USE_SURROGATES, 0xDB80, 0xDBFF), (UNICODE_LOW_SURROGATES, 0xDC00, 0xDFFF), (UNICODE_PRIVATE_USE_AREA, 0xE000, 0xF8FF), (UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS, 0xF900, 0xFAFF), (UNICODE_ALPHABETIC_PRESENTATION_FORMS, 0xFB00, 0xFB4F), (UNICODE_ARABIC_PRESENTATION_FORMS_A, 0xFB50, 0xFDFF), (UNICODE_VARIATION_SELECTORS, 0xFE00, 0xFE0F), (UNICODE_VERTICAL_FORMS, 0xFE10, 0xFE1F), (UNICODE_COMBINING_HALF_MARKS, 0xFE20, 0xFE2F), (UNICODE_CJK_COMPATIBILITY_FORMS, 0xFE30, 0xFE4F), (UNICODE_SMALL_FORM_VARIANTS, 0xFE50, 0xFE6F), (UNICODE_ARABIC_PRESENTATION_FORMS_B, 0xFE70, 0xFEFF), (UNICODE_HALFWIDTH_AND_FULLWIDTH_FORMS, 0xFF00, 0xFFEF), (UNICODE_SPECIALS, 0xFFF0, 0xFFFF), (UNICODE_LINEAR_B_SYLLABARY, 0x10000, 0x1007F), (UNICODE_LINEAR_B_IDEOGRAMS, 0x10080, 0x100FF), (UNICODE_AEGEAN_NUMBERS, 0x10100, 0x1013F), (UNICODE_ANCIENT_GREEK_NUMBERS, 0x10140, 0x1018F), (UNICODE_ANCIENT_SYMBOLS, 0x10190, 0x101CF), (UNICODE_PHAISTOS_DISC, 0x101D0, 0x101FF), (UNICODE_LYCIAN, 0x10280, 0x1029F), (UNICODE_CARIAN, 0x102A0, 0x102DF), (UNICODE_OLD_ITALIC, 0x10300, 0x1032F), (UNICODE_GOTHIC, 0x10330, 0x1034F), (UNICODE_UGARITIC, 0x10380, 0x1039F), (UNICODE_OLD_PERSIAN, 0x103A0, 0x103DF), (UNICODE_DESERET, 0x10400, 0x1044F), (UNICODE_SHAVIAN, 0x10450, 0x1047F), (UNICODE_OSMANYA, 0x10480, 0x104AF), (UNICODE_CYPRIOT_SYLLABARY, 0x10800, 0x1083F), (UNICODE_IMPERIAL_ARAMAIC, 0x10840, 0x1085F), (UNICODE_PHOENICIAN, 0x10900, 0x1091F), (UNICODE_LYDIAN, 0x10920, 0x1093F), (UNICODE_MEROITIC_HIEROGLYPHS, 0x10980, 0x1099F), (UNICODE_MEROITIC_CURSIVE, 0x109A0, 0x109FF), (UNICODE_KHAROSHTHI, 0x10A00, 0x10A5F), (UNICODE_OLD_SOUTH_ARABIAN, 0x10A60, 0x10A7F), (UNICODE_AVESTAN, 0x10B00, 0x10B3F), (UNICODE_INSCRIPTIONAL_PARTHIAN, 0x10B40, 0x10B5F), (UNICODE_INSCRIPTIONAL_PAHLAVI, 0x10B60, 0x10B7F), (UNICODE_OLD_TURKIC, 0x10C00, 0x10C4F), (UNICODE_RUMI_NUMERAL_SYMBOLS, 0x10E60, 0x10E7F), (UNICODE_BRAHMI, 0x11000, 0x1107F), (UNICODE_KAITHI, 0x11080, 0x110CF), (UNICODE_SORA_SOMPENG, 0x110D0, 0x110FF), (UNICODE_CHAKMA, 0x11100, 0x1114F), (UNICODE_SHARADA, 0x11180, 0x111DF), (UNICODE_TAKRI, 0x11680, 0x116CF), (UNICODE_CUNEIFORM, 0x12000, 0x123FF), (UNICODE_CUNEIFORM_NUMBERS_AND_PUNCTUATION, 0x12400, 0x1247F), (UNICODE_EGYPTIAN_HIEROGLYPHS, 0x13000, 0x1342F), (UNICODE_BAMUM_SUPPLEMENT, 0x16800, 0x16A3F), (UNICODE_MIAO, 0x16F00, 0x16F9F), (UNICODE_KANA_SUPPLEMENT, 0x1B000, 0x1B0FF), (UNICODE_BYZANTINE_MUSICAL_SYMBOLS, 0x1D000, 0x1D0FF), (UNICODE_MUSICAL_SYMBOLS, 0x1D100, 0x1D1FF), (UNICODE_ANCIENT_GREEK_MUSICAL_NOTATION, 0x1D200, 0x1D24F), (UNICODE_TAI_XUAN_JING_SYMBOLS, 0x1D300, 0x1D35F), (UNICODE_COUNTING_ROD_NUMERALS, 0x1D360, 0x1D37F), (UNICODE_MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 0x1D400, 0x1D7FF), (UNICODE_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 0x1EE00, 0x1EEFF), (UNICODE_MAHJONG_TILES, 0x1F000, 0x1F02F), (UNICODE_DOMINO_TILES, 0x1F030, 0x1F09F), (UNICODE_PLAYING_CARDS, 0x1F0A0, 0x1F0FF), (UNICODE_ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 0x1F100, 0x1F1FF), (UNICODE_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 0x1F200, 0x1F2FF), (UNICODE_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 0x1F300, 0x1F5FF), (UNICODE_EMOTICONS, 0x1F600, 0x1F64F), (UNICODE_TRANSPORT_AND_MAP_SYMBOLS, 0x1F680, 0x1F6FF), (UNICODE_ALCHEMICAL_SYMBOLS, 0x1F700, 0x1F77F), (UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 0x20000, 0x2A6DF), (UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 0x2A700, 0x2B73F), (UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 0x2B740, 0x2B81F), (UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 0x2F800, 0x2FA1F), (UNICODE_TAGS, 0xE0000, 0xE007F), (UNICODE_VARIATION_SELECTORS_SUPPLEMENT, 0xE0100, 0xE01EF), (UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_A, 0xF0000, 0xFFFFF), (UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_B, 0x100000, 0x10FFFF), ] NUM_BLOCKS = len(_unicode_blocks) def unicode_block(ch): '''Return the Unicode block name for ch, or None if ch has no block.''' cp = ord(ch) # special case basic latin if cp <= 0x7F: return UNICODE_BASIC_LATIN # binary search for the correct block be, en = 0, NUM_BLOCKS - 1 while be <= en: mid = (be+en) >> 1 name, start, end = _unicode_blocks[mid] if start <= cp <= end: return name if cp < start: en = mid-1 else: be = mid+1