1
0
Fork 0
mirror of https://github.com/morpheus65535/bazarr synced 2024-12-29 02:55:52 +00:00
bazarr/libs/guess_language/data/__init__.py

152 lines
7 KiB
Python

BLOCK_RSHIFT = 4
BLOCKS = [None] * 0x2fa2
BLOCKS[0x0:0x8] = ['Basic Latin'] * 0x8
BLOCKS[0x8:0x10] = ['Extended Latin'] * 0x8 # Latin-1 Supplement
BLOCKS[0x10:0x18] = ['Extended Latin'] * 0x8 # Latin Extended-A
BLOCKS[0x18:0x25] = ['Latin Extended-B'] * 0xd
BLOCKS[0x25:0x2b] = ['Extended Latin'] * 0x6 # IPA Extensions
BLOCKS[0x2b:0x30] = ['Spacing Modifier Letters'] * 0x5
BLOCKS[0x37:0x40] = ['Greek and Coptic'] * 0x9
BLOCKS[0x40:0x50] = ['Cyrillic'] * 0x10
BLOCKS[0x50:0x53] = ['Cyrillic Supplement'] * 0x3
BLOCKS[0x53:0x59] = ['Armenian'] * 0x6
BLOCKS[0x59:0x60] = ['Hebrew'] * 0x7
BLOCKS[0x60:0x70] = ['Arabic'] * 0x10
BLOCKS[0x70:0x75] = ['Syriac'] * 0x5
BLOCKS[0x75:0x78] = ['Arabic Supplement'] * 0x3
BLOCKS[0x78:0x7c] = ['Thaana'] * 0x4
BLOCKS[0x7c:0x80] = ['NKo'] * 0x4
BLOCKS[0x80:0x84] = ['Samaritan'] * 0x4
BLOCKS[0x84:0x86] = ['Mandaic'] * 0x2
BLOCKS[0x8a:0x90] = ['Arabic Extended-A'] * 0x6
BLOCKS[0x90:0x98] = ['Devanagari'] * 0x8
BLOCKS[0x98:0xa0] = ['Bengali'] * 0x8
BLOCKS[0xa0:0xa8] = ['Gurmukhi'] * 0x8
BLOCKS[0xa8:0xb0] = ['Gujarati'] * 0x8
BLOCKS[0xb0:0xb8] = ['Oriya'] * 0x8
BLOCKS[0xb8:0xc0] = ['Tamil'] * 0x8
BLOCKS[0xc0:0xc8] = ['Telugu'] * 0x8
BLOCKS[0xc8:0xd0] = ['Kannada'] * 0x8
BLOCKS[0xd0:0xd8] = ['Malayalam'] * 0x8
BLOCKS[0xd8:0xe0] = ['Sinhala'] * 0x8
BLOCKS[0xe0:0xe8] = ['Thai'] * 0x8
BLOCKS[0xe8:0xf0] = ['Lao'] * 0x8
BLOCKS[0xf0:0x100] = ['Tibetan'] * 0x10
BLOCKS[0x100:0x10a] = ['Myanmar'] * 0xa
BLOCKS[0x10a:0x110] = ['Georgian'] * 0x6
BLOCKS[0x110:0x120] = ['Hangul Jamo'] * 0x10
BLOCKS[0x120:0x138] = ['Ethiopic'] * 0x18
BLOCKS[0x138:0x13a] = ['Ethiopic Supplement'] * 0x2
BLOCKS[0x13a:0x140] = ['Cherokee'] * 0x6
BLOCKS[0x140:0x168] = ['Unified Canadian Aboriginal Syllabics'] * 0x28
BLOCKS[0x168:0x16a] = ['Ogham'] * 0x2
BLOCKS[0x16a:0x170] = ['Runic'] * 0x6
BLOCKS[0x170:0x172] = ['Tagalog'] * 0x2
BLOCKS[0x172:0x174] = ['Hanunoo'] * 0x2
BLOCKS[0x174:0x176] = ['Buhid'] * 0x2
BLOCKS[0x176:0x178] = ['Tagbanwa'] * 0x2
BLOCKS[0x178:0x180] = ['Khmer'] * 0x8
BLOCKS[0x180:0x18b] = ['Mongolian'] * 0xb
BLOCKS[0x18b:0x190] = ['Unified Canadian Aboriginal Syllabics Extended'] * 0x5
BLOCKS[0x190:0x195] = ['Limbu'] * 0x5
BLOCKS[0x195:0x198] = ['Tai Le'] * 0x3
BLOCKS[0x198:0x19e] = ['New Tai Lue'] * 0x6
BLOCKS[0x1a0:0x1a2] = ['Buginese'] * 0x2
BLOCKS[0x1a2:0x1ab] = ['Tai Tham'] * 0x9
BLOCKS[0x1b0:0x1b8] = ['Balinese'] * 0x8
BLOCKS[0x1b8:0x1bc] = ['Sundanese'] * 0x4
BLOCKS[0x1bc:0x1c0] = ['Batak'] * 0x4
BLOCKS[0x1c0:0x1c5] = ['Lepcha'] * 0x5
BLOCKS[0x1c5:0x1c8] = ['Ol Chiki'] * 0x3
BLOCKS[0x1cd:0x1d0] = ['Vedic Extensions'] * 0x3
BLOCKS[0x1d0:0x1d8] = ['Phonetic Extensions'] * 0x8
BLOCKS[0x1d8:0x1dc] = ['Phonetic Extensions Supplement'] * 0x4
BLOCKS[0x1e0:0x1f0] = ['Latin Extended Additional'] * 0x10
BLOCKS[0x1f0:0x200] = ['Greek Extended'] * 0x10
BLOCKS[0x207:0x20a] = ['Superscripts and Subscripts'] * 0x3
BLOCKS[0x210:0x215] = ['Letterlike Symbols'] * 0x5
BLOCKS[0x215:0x219] = ['Number Forms'] * 0x4
BLOCKS[0x2c0:0x2c6] = ['Glagolitic'] * 0x6
BLOCKS[0x2c6:0x2c8] = ['Latin Extended-C'] * 0x2
BLOCKS[0x2c8:0x2d0] = ['Coptic'] * 0x8
BLOCKS[0x2d0:0x2d3] = ['Georgian Supplement'] * 0x3
BLOCKS[0x2d3:0x2d8] = ['Tifinagh'] * 0x5
BLOCKS[0x2d8:0x2de] = ['Ethiopic Extended'] * 0x6
BLOCKS[0x2e0:0x2e8] = ['Supplemental Punctuation'] * 0x8
BLOCKS[0x300:0x304] = ['CJK Symbols and Punctuation'] * 0x4
BLOCKS[0x304:0x30a] = ['Kana'] * 0x6 # Hiragana
BLOCKS[0x30a:0x310] = ['Kana'] * 0x6 # Katakana
BLOCKS[0x310:0x313] = ['Bopomofo'] * 0x3
BLOCKS[0x313:0x319] = ['Hangul Compatibility Jamo'] * 0x6
BLOCKS[0x31a:0x31c] = ['Bopomofo Extended'] * 0x2
BLOCKS[0x31f:0x320] = ['Kana'] * 0x1 # Katakana Phonetic Extensions
BLOCKS[0x340:0x4dc] = ['CJK Unified Ideographs Extension A'] * 0x19c
BLOCKS[0x4e0:0xa00] = ['CJK Unified Ideographs'] * 0x520
BLOCKS[0xa00:0xa49] = ['Yi Syllables'] * 0x49
BLOCKS[0xa4d:0xa50] = ['Lisu'] * 0x3
BLOCKS[0xa50:0xa64] = ['Vai'] * 0x14
BLOCKS[0xa64:0xa6a] = ['Cyrillic Extended-B'] * 0x6
BLOCKS[0xa6a:0xa70] = ['Bamum'] * 0x6
BLOCKS[0xa70:0xa72] = ['Modifier Tone Letters'] * 0x2
BLOCKS[0xa72:0xa80] = ['Latin Extended-D'] * 0xe
BLOCKS[0xa80:0xa83] = ['Syloti Nagri'] * 0x3
BLOCKS[0xa84:0xa88] = ['Phags-pa'] * 0x4
BLOCKS[0xa88:0xa8e] = ['Saurashtra'] * 0x6
BLOCKS[0xa8e:0xa90] = ['Devanagari Extended'] * 0x2
BLOCKS[0xa90:0xa93] = ['Kayah Li'] * 0x3
BLOCKS[0xa93:0xa96] = ['Rejang'] * 0x3
BLOCKS[0xa96:0xa98] = ['Hangul Jamo Extended-A'] * 0x2
BLOCKS[0xa98:0xa9e] = ['Javanese'] * 0x6
BLOCKS[0xaa0:0xaa6] = ['Cham'] * 0x6
BLOCKS[0xaa6:0xaa8] = ['Myanmar Extended-A'] * 0x2
BLOCKS[0xaa8:0xaae] = ['Tai Viet'] * 0x6
BLOCKS[0xaae:0xab0] = ['Meetei Mayek Extensions'] * 0x2
BLOCKS[0xab0:0xab3] = ['Ethiopic Extended-A'] * 0x3
BLOCKS[0xabc:0xac0] = ['Meetei Mayek'] * 0x4
BLOCKS[0xac0:0xd7b] = ['Hangul Syllables'] * 0x2bb
BLOCKS[0xd7b:0xd80] = ['Hangul Jamo Extended-B'] * 0x5
BLOCKS[0xf90:0xfb0] = ['CJK Compatibility Ideographs'] * 0x20
BLOCKS[0xfb0:0xfb5] = ['Alphabetic Presentation Forms'] * 0x5
BLOCKS[0xfb5:0xfe0] = ['Arabic Presentation Forms-A'] * 0x2b
BLOCKS[0xfe7:0xff0] = ['Arabic Presentation Forms-B'] * 0x9
BLOCKS[0xff0:0xfff] = ['Halfwidth and Fullwidth Forms'] * 0xf
BLOCKS[0x1000:0x1008] = ['Linear B Syllabary'] * 0x8
BLOCKS[0x1008:0x1010] = ['Linear B Ideograms'] * 0x8
BLOCKS[0x1028:0x102a] = ['Lycian'] * 0x2
BLOCKS[0x102a:0x102e] = ['Carian'] * 0x4
BLOCKS[0x1030:0x1033] = ['Old Italic'] * 0x3
BLOCKS[0x1033:0x1035] = ['Gothic'] * 0x2
BLOCKS[0x1038:0x103a] = ['Ugaritic'] * 0x2
BLOCKS[0x103a:0x103e] = ['Old Persian'] * 0x4
BLOCKS[0x1040:0x1045] = ['Deseret'] * 0x5
BLOCKS[0x1045:0x1048] = ['Shavian'] * 0x3
BLOCKS[0x1048:0x104b] = ['Osmanya'] * 0x3
BLOCKS[0x1080:0x1084] = ['Cypriot Syllabary'] * 0x4
BLOCKS[0x1084:0x1086] = ['Imperial Aramaic'] * 0x2
BLOCKS[0x1090:0x1092] = ['Phoenician'] * 0x2
BLOCKS[0x1092:0x1094] = ['Lydian'] * 0x2
BLOCKS[0x1098:0x109a] = ['Meroitic Hieroglyphs'] * 0x2
BLOCKS[0x109a:0x10a0] = ['Meroitic Cursive'] * 0x6
BLOCKS[0x10a0:0x10a6] = ['Kharoshthi'] * 0x6
BLOCKS[0x10a6:0x10a8] = ['Old South Arabian'] * 0x2
BLOCKS[0x10b0:0x10b4] = ['Avestan'] * 0x4
BLOCKS[0x10b4:0x10b6] = ['Inscriptional Parthian'] * 0x2
BLOCKS[0x10b6:0x10b8] = ['Inscriptional Pahlavi'] * 0x2
BLOCKS[0x10c0:0x10c5] = ['Old Turkic'] * 0x5
BLOCKS[0x1100:0x1108] = ['Brahmi'] * 0x8
BLOCKS[0x1108:0x110d] = ['Kaithi'] * 0x5
BLOCKS[0x110d:0x1110] = ['Sora Sompeng'] * 0x3
BLOCKS[0x1110:0x1115] = ['Chakma'] * 0x5
BLOCKS[0x1118:0x111e] = ['Sharada'] * 0x6
BLOCKS[0x1168:0x116d] = ['Takri'] * 0x5
BLOCKS[0x1200:0x1240] = ['Cuneiform'] * 0x40
BLOCKS[0x1300:0x1343] = ['Egyptian Hieroglyphs'] * 0x43
BLOCKS[0x1680:0x16a4] = ['Bamum Supplement'] * 0x24
BLOCKS[0x16f0:0x16fa] = ['Miao'] * 0xa
BLOCKS[0x1b00:0x1b10] = ['Kana Supplement'] * 0x10
BLOCKS[0x1d40:0x1d80] = ['Mathematical Alphanumeric Symbols'] * 0x40
BLOCKS[0x1ee0:0x1ef0] = ['Arabic Mathematical Alphabetic Symbols'] * 0x10
BLOCKS[0x2000:0x2a6e] = ['CJK Unified Ideographs Extension B'] * 0xa6e
BLOCKS[0x2a70:0x2b74] = ['CJK Unified Ideographs Extension C'] * 0x104
BLOCKS[0x2b74:0x2b82] = ['CJK Unified Ideographs Extension D'] * 0xe
BLOCKS[0x2f80:0x2fa2] = ['CJK Compatibility Ideographs Supplement'] * 0x22