mirror of https://github.com/M66B/FairEmail.git
Auto recognize ISO-2022-JP
This commit is contained in:
parent
b49208083e
commit
cd1556470b
|
@ -0,0 +1,118 @@
|
||||||
|
package eu.faircode.email;
|
||||||
|
|
||||||
|
/*
|
||||||
|
This file is part of FairEmail.
|
||||||
|
|
||||||
|
FairEmail is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
FairEmail is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with FairEmail. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
Copyright 2018-2020 by Marcel Bokhorst (M66B)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.nio.charset.UnsupportedCharsetException;
|
||||||
|
|
||||||
|
class CharsetHelper {
|
||||||
|
static boolean isUTF8(String text) {
|
||||||
|
// Get extended ASCII characters
|
||||||
|
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
|
||||||
|
|
||||||
|
int bytes;
|
||||||
|
for (int i = 0; i < octets.length; i++) {
|
||||||
|
if ((octets[i] & 0b10000000) == 0b00000000)
|
||||||
|
bytes = 1;
|
||||||
|
else if ((octets[i] & 0b11100000) == 0b11000000)
|
||||||
|
bytes = 2;
|
||||||
|
else if ((octets[i] & 0b11110000) == 0b11100000)
|
||||||
|
bytes = 3;
|
||||||
|
else if ((octets[i] & 0b11111000) == 0b11110000)
|
||||||
|
bytes = 4;
|
||||||
|
else if ((octets[i] & 0b11111100) == 0b11111000)
|
||||||
|
bytes = 5;
|
||||||
|
else if ((octets[i] & 0b11111110) == 0b11111100)
|
||||||
|
bytes = 6;
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (i + bytes > octets.length)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
while (--bytes > 0)
|
||||||
|
if ((octets[++i] & 0b11000000) != 0b10000000)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static boolean isISO8859(String text) {
|
||||||
|
// https://en.wikipedia.org/wiki/ISO/IEC_8859-1
|
||||||
|
int c;
|
||||||
|
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
|
||||||
|
for (byte b : octets) {
|
||||||
|
c = b & 0xFF;
|
||||||
|
if (c < 32)
|
||||||
|
return false;
|
||||||
|
if (c >= 127 && c < 160)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static boolean isISO2022JP(String text) {
|
||||||
|
// https://en.wikipedia.org/wiki/ISO/IEC_2022
|
||||||
|
// https://www.sljfaq.org/afaq/encodings.html#encodings-ISO-2022-JP
|
||||||
|
|
||||||
|
try {
|
||||||
|
Charset.forName("ISO-2022-JP");
|
||||||
|
} catch (UnsupportedCharsetException ex) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int c;
|
||||||
|
int escapes = 0;
|
||||||
|
boolean escaped = false;
|
||||||
|
boolean parenthesis = false;
|
||||||
|
boolean dollar = false;
|
||||||
|
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
|
||||||
|
for (byte b : octets) {
|
||||||
|
c = b & 0xFF;
|
||||||
|
|
||||||
|
if (c > 0x7F)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (escaped) {
|
||||||
|
escaped = false;
|
||||||
|
if (c == '(')
|
||||||
|
parenthesis = true;
|
||||||
|
else if (c == '$')
|
||||||
|
dollar = true;
|
||||||
|
} else if (parenthesis) {
|
||||||
|
parenthesis = false;
|
||||||
|
if (c == 'B' || c == 'J')
|
||||||
|
escapes++;
|
||||||
|
} else if (dollar) {
|
||||||
|
dollar = false;
|
||||||
|
if (c == '@' || c == 'B')
|
||||||
|
escapes++;
|
||||||
|
} else if (c == 0x1B)
|
||||||
|
escaped = true;
|
||||||
|
|
||||||
|
if (escapes >= 3)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
|
@ -961,52 +961,6 @@ public class Helper {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean isUTF8(String text) {
|
|
||||||
// Get extended ASCII characters
|
|
||||||
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
|
|
||||||
|
|
||||||
int bytes;
|
|
||||||
for (int i = 0; i < octets.length; i++) {
|
|
||||||
if ((octets[i] & 0b10000000) == 0b00000000)
|
|
||||||
bytes = 1;
|
|
||||||
else if ((octets[i] & 0b11100000) == 0b11000000)
|
|
||||||
bytes = 2;
|
|
||||||
else if ((octets[i] & 0b11110000) == 0b11100000)
|
|
||||||
bytes = 3;
|
|
||||||
else if ((octets[i] & 0b11111000) == 0b11110000)
|
|
||||||
bytes = 4;
|
|
||||||
else if ((octets[i] & 0b11111100) == 0b11111000)
|
|
||||||
bytes = 5;
|
|
||||||
else if ((octets[i] & 0b11111110) == 0b11111100)
|
|
||||||
bytes = 6;
|
|
||||||
else
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (i + bytes > octets.length)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
while (--bytes > 0)
|
|
||||||
if ((octets[++i] & 0b11000000) != 0b10000000)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static boolean isISO8859(String text) {
|
|
||||||
// https://en.wikipedia.org/wiki/ISO/IEC_8859-1
|
|
||||||
int c;
|
|
||||||
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
|
|
||||||
for (byte b : octets) {
|
|
||||||
c = b & 0xFF;
|
|
||||||
if (c < 32)
|
|
||||||
return false;
|
|
||||||
if (c >= 127 && c < 160)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static boolean isSingleScript(String s) {
|
static boolean isSingleScript(String s) {
|
||||||
// https://en.wikipedia.org/wiki/IDN_homograph_attack
|
// https://en.wikipedia.org/wiki/IDN_homograph_attack
|
||||||
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N)
|
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N)
|
||||||
|
|
|
@ -1139,7 +1139,7 @@ public class MessageHelper {
|
||||||
if (header.trim().startsWith("=?"))
|
if (header.trim().startsWith("=?"))
|
||||||
return header;
|
return header;
|
||||||
|
|
||||||
if (Helper.isUTF8(header)) {
|
if (CharsetHelper.isUTF8(header)) {
|
||||||
Log.w("Converting " + name + " to UTF-8");
|
Log.w("Converting " + name + " to UTF-8");
|
||||||
return new String(header.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
|
return new String(header.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1730,8 +1730,10 @@ public class MessageHelper {
|
||||||
warnings.add(context.getString(R.string.title_no_charset, charset));
|
warnings.add(context.getString(R.string.title_no_charset, charset));
|
||||||
|
|
||||||
if (part.isMimeType("text/plain")) {
|
if (part.isMimeType("text/plain")) {
|
||||||
if ((TextUtils.isEmpty(charset) || charset.equalsIgnoreCase(StandardCharsets.US_ASCII.name())) &&
|
if (TextUtils.isEmpty(charset) && CharsetHelper.isISO2022JP(result))
|
||||||
Helper.isUTF8(result)) {
|
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), "ISO-2022-JP");
|
||||||
|
else if ((TextUtils.isEmpty(charset) || charset.equalsIgnoreCase(StandardCharsets.US_ASCII.name())) &&
|
||||||
|
CharsetHelper.isUTF8(result)) {
|
||||||
Log.i("Charset plain=UTF8");
|
Log.i("Charset plain=UTF8");
|
||||||
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
|
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
|
||||||
}
|
}
|
||||||
|
@ -1740,7 +1742,9 @@ public class MessageHelper {
|
||||||
result = HtmlHelper.flow(result);
|
result = HtmlHelper.flow(result);
|
||||||
result = "<div x-plain=\"true\">" + HtmlHelper.formatPre(result) + "</div>";
|
result = "<div x-plain=\"true\">" + HtmlHelper.formatPre(result) + "</div>";
|
||||||
} else if (part.isMimeType("text/html")) {
|
} else if (part.isMimeType("text/html")) {
|
||||||
if (TextUtils.isEmpty(charset)) {
|
if (TextUtils.isEmpty(charset) && CharsetHelper.isISO2022JP(result))
|
||||||
|
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), "ISO-2022-JP");
|
||||||
|
else if (TextUtils.isEmpty(charset)) {
|
||||||
// <meta charset="utf-8" />
|
// <meta charset="utf-8" />
|
||||||
// <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
// <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||||
String excerpt = result.substring(0, Math.min(MAX_META_EXCERPT, result.length()));
|
String excerpt = result.substring(0, Math.min(MAX_META_EXCERPT, result.length()));
|
||||||
|
@ -1760,7 +1764,7 @@ public class MessageHelper {
|
||||||
try {
|
try {
|
||||||
Log.i("Charset=" + meta);
|
Log.i("Charset=" + meta);
|
||||||
Charset c = Charset.forName(charset);
|
Charset c = Charset.forName(charset);
|
||||||
if (c.equals(StandardCharsets.UTF_8) && !Helper.isUTF8(result))
|
if (c.equals(StandardCharsets.UTF_8) && !CharsetHelper.isUTF8(result))
|
||||||
break;
|
break;
|
||||||
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), charset);
|
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), charset);
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Reference in New Issue