mirror of
https://github.com/M66B/FairEmail.git
synced 2025-01-01 12:44:42 +00:00
Less common charsets
This commit is contained in:
parent
4dd3b606a7
commit
66293e7a74
1 changed files with 12 additions and 4 deletions
|
@ -38,10 +38,18 @@ public class CharsetHelper {
|
||||||
private static String CHINESE = new Locale("zh").getLanguage();
|
private static String CHINESE = new Locale("zh").getLanguage();
|
||||||
private static final List<String> COMMON = Collections.unmodifiableList(Arrays.asList(
|
private static final List<String> COMMON = Collections.unmodifiableList(Arrays.asList(
|
||||||
"US-ASCII",
|
"US-ASCII",
|
||||||
"ISO-8859-1", "ISO-8859-2",
|
"ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-7",
|
||||||
"windows-1250", "windows-1252", "windows-1257",
|
"windows-1250", "windows-1251", "windows-1252", "windows-1257",
|
||||||
"UTF-7", "UTF-8"
|
"UTF-7", "UTF-8"
|
||||||
));
|
));
|
||||||
|
private static final List<String> LESS_COMMON = Collections.unmodifiableList(Arrays.asList(
|
||||||
|
"GBK", "GB2312", "HZ-GB-2312",
|
||||||
|
"EUC", "EUC-KR",
|
||||||
|
"Big5", "BIG5-CP950",
|
||||||
|
"ISO-2022-JP", "Shift_JIS",
|
||||||
|
"cp852",
|
||||||
|
"x-binaryenc"
|
||||||
|
));
|
||||||
private static final int MIN_W1252 = 10;
|
private static final int MIN_W1252 = 10;
|
||||||
private static final Pair<byte[], byte[]>[] sUtf8W1252 = new Pair[128];
|
private static final Pair<byte[], byte[]>[] sUtf8W1252 = new Pair[128];
|
||||||
|
|
||||||
|
@ -182,7 +190,7 @@ public class CharsetHelper {
|
||||||
if (TextUtils.isEmpty(detected.charset)) {
|
if (TextUtils.isEmpty(detected.charset)) {
|
||||||
Log.e("compact_enc_det result=" + detected);
|
Log.e("compact_enc_det result=" + detected);
|
||||||
return null;
|
return null;
|
||||||
} else if (COMMON.contains(detected.charset))
|
} else if (COMMON.contains(detected.charset) || LESS_COMMON.contains(detected.charset))
|
||||||
Log.w("compact_enc_det result=" + detected);
|
Log.w("compact_enc_det result=" + detected);
|
||||||
else if ("GB18030".equals(detected.charset)) {
|
else if ("GB18030".equals(detected.charset)) {
|
||||||
boolean chinese = Locale.getDefault().getLanguage().equals(CHINESE);
|
boolean chinese = Locale.getDefault().getLanguage().equals(CHINESE);
|
||||||
|
@ -190,7 +198,7 @@ public class CharsetHelper {
|
||||||
Log.e("compact_enc_det result=" + detected + " chinese=" + chinese);
|
Log.e("compact_enc_det result=" + detected + " chinese=" + chinese);
|
||||||
if (!chinese)
|
if (!chinese)
|
||||||
return null;
|
return null;
|
||||||
} else // GBK, Big5, ISO-2022-JP, HZ-GB-2312, GB2312, Shift_JIS, x-binaryenc, EUC-KR
|
} else
|
||||||
Log.e("compact_enc_det result=" + detected);
|
Log.e("compact_enc_det result=" + detected);
|
||||||
|
|
||||||
return Charset.forName(detected.charset);
|
return Charset.forName(detected.charset);
|
||||||
|
|
Loading…
Reference in a new issue