diff --git a/app/src/main/java/eu/faircode/email/CharsetHelper.java b/app/src/main/java/eu/faircode/email/CharsetHelper.java
new file mode 100644
index 0000000000..cba87dba16
--- /dev/null
+++ b/app/src/main/java/eu/faircode/email/CharsetHelper.java
@@ -0,0 +1,118 @@
+package eu.faircode.email;
+
+/*
+ This file is part of FairEmail.
+
+ FairEmail is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ FairEmail is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with FairEmail. If not, see .
+
+ Copyright 2018-2020 by Marcel Bokhorst (M66B)
+*/
+
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.nio.charset.UnsupportedCharsetException;
+
+class CharsetHelper {
+ static boolean isUTF8(String text) {
+ // Get extended ASCII characters
+ byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
+
+ int bytes;
+ for (int i = 0; i < octets.length; i++) {
+ if ((octets[i] & 0b10000000) == 0b00000000)
+ bytes = 1;
+ else if ((octets[i] & 0b11100000) == 0b11000000)
+ bytes = 2;
+ else if ((octets[i] & 0b11110000) == 0b11100000)
+ bytes = 3;
+ else if ((octets[i] & 0b11111000) == 0b11110000)
+ bytes = 4;
+ else if ((octets[i] & 0b11111100) == 0b11111000)
+ bytes = 5;
+ else if ((octets[i] & 0b11111110) == 0b11111100)
+ bytes = 6;
+ else
+ return false;
+
+ if (i + bytes > octets.length)
+ return false;
+
+ while (--bytes > 0)
+ if ((octets[++i] & 0b11000000) != 0b10000000)
+ return false;
+ }
+
+ return true;
+ }
+
+ static boolean isISO8859(String text) {
+ // https://en.wikipedia.org/wiki/ISO/IEC_8859-1
+ int c;
+ byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
+ for (byte b : octets) {
+ c = b & 0xFF;
+ if (c < 32)
+ return false;
+ if (c >= 127 && c < 160)
+ return false;
+ }
+ return true;
+ }
+
+ static boolean isISO2022JP(String text) {
+ // https://en.wikipedia.org/wiki/ISO/IEC_2022
+ // https://www.sljfaq.org/afaq/encodings.html#encodings-ISO-2022-JP
+
+ try {
+ Charset.forName("ISO-2022-JP");
+ } catch (UnsupportedCharsetException ex) {
+ return false;
+ }
+
+ int c;
+ int escapes = 0;
+ boolean escaped = false;
+ boolean parenthesis = false;
+ boolean dollar = false;
+ byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
+ for (byte b : octets) {
+ c = b & 0xFF;
+
+ if (c > 0x7F)
+ return false;
+
+ if (escaped) {
+ escaped = false;
+ if (c == '(')
+ parenthesis = true;
+ else if (c == '$')
+ dollar = true;
+ } else if (parenthesis) {
+ parenthesis = false;
+ if (c == 'B' || c == 'J')
+ escapes++;
+ } else if (dollar) {
+ dollar = false;
+ if (c == '@' || c == 'B')
+ escapes++;
+ } else if (c == 0x1B)
+ escaped = true;
+
+ if (escapes >= 3)
+ return true;
+ }
+
+ return false;
+ }
+}
diff --git a/app/src/main/java/eu/faircode/email/Helper.java b/app/src/main/java/eu/faircode/email/Helper.java
index 8c97b78315..bfc6ece7a9 100644
--- a/app/src/main/java/eu/faircode/email/Helper.java
+++ b/app/src/main/java/eu/faircode/email/Helper.java
@@ -961,52 +961,6 @@ public class Helper {
return false;
}
- static boolean isUTF8(String text) {
- // Get extended ASCII characters
- byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
-
- int bytes;
- for (int i = 0; i < octets.length; i++) {
- if ((octets[i] & 0b10000000) == 0b00000000)
- bytes = 1;
- else if ((octets[i] & 0b11100000) == 0b11000000)
- bytes = 2;
- else if ((octets[i] & 0b11110000) == 0b11100000)
- bytes = 3;
- else if ((octets[i] & 0b11111000) == 0b11110000)
- bytes = 4;
- else if ((octets[i] & 0b11111100) == 0b11111000)
- bytes = 5;
- else if ((octets[i] & 0b11111110) == 0b11111100)
- bytes = 6;
- else
- return false;
-
- if (i + bytes > octets.length)
- return false;
-
- while (--bytes > 0)
- if ((octets[++i] & 0b11000000) != 0b10000000)
- return false;
- }
-
- return true;
- }
-
- static boolean isISO8859(String text) {
- // https://en.wikipedia.org/wiki/ISO/IEC_8859-1
- int c;
- byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
- for (byte b : octets) {
- c = b & 0xFF;
- if (c < 32)
- return false;
- if (c >= 127 && c < 160)
- return false;
- }
- return true;
- }
-
static boolean isSingleScript(String s) {
// https://en.wikipedia.org/wiki/IDN_homograph_attack
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N)
diff --git a/app/src/main/java/eu/faircode/email/MessageHelper.java b/app/src/main/java/eu/faircode/email/MessageHelper.java
index 7c071dd362..fae44ad1b8 100644
--- a/app/src/main/java/eu/faircode/email/MessageHelper.java
+++ b/app/src/main/java/eu/faircode/email/MessageHelper.java
@@ -1139,7 +1139,7 @@ public class MessageHelper {
if (header.trim().startsWith("=?"))
return header;
- if (Helper.isUTF8(header)) {
+ if (CharsetHelper.isUTF8(header)) {
Log.w("Converting " + name + " to UTF-8");
return new String(header.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
} else {
@@ -1730,8 +1730,10 @@ public class MessageHelper {
warnings.add(context.getString(R.string.title_no_charset, charset));
if (part.isMimeType("text/plain")) {
- if ((TextUtils.isEmpty(charset) || charset.equalsIgnoreCase(StandardCharsets.US_ASCII.name())) &&
- Helper.isUTF8(result)) {
+ if (TextUtils.isEmpty(charset) && CharsetHelper.isISO2022JP(result))
+ result = new String(result.getBytes(StandardCharsets.ISO_8859_1), "ISO-2022-JP");
+ else if ((TextUtils.isEmpty(charset) || charset.equalsIgnoreCase(StandardCharsets.US_ASCII.name())) &&
+ CharsetHelper.isUTF8(result)) {
Log.i("Charset plain=UTF8");
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
}
@@ -1740,7 +1742,9 @@ public class MessageHelper {
result = HtmlHelper.flow(result);
result = "
" + HtmlHelper.formatPre(result) + "
";
} else if (part.isMimeType("text/html")) {
- if (TextUtils.isEmpty(charset)) {
+ if (TextUtils.isEmpty(charset) && CharsetHelper.isISO2022JP(result))
+ result = new String(result.getBytes(StandardCharsets.ISO_8859_1), "ISO-2022-JP");
+ else if (TextUtils.isEmpty(charset)) {
//
//
String excerpt = result.substring(0, Math.min(MAX_META_EXCERPT, result.length()));
@@ -1760,7 +1764,7 @@ public class MessageHelper {
try {
Log.i("Charset=" + meta);
Charset c = Charset.forName(charset);
- if (c.equals(StandardCharsets.UTF_8) && !Helper.isUTF8(result))
+ if (c.equals(StandardCharsets.UTF_8) && !CharsetHelper.isUTF8(result))
break;
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), charset);
break;