diff --git a/app/src/main/java/eu/faircode/email/CharsetHelper.java b/app/src/main/java/eu/faircode/email/CharsetHelper.java new file mode 100644 index 0000000000..cba87dba16 --- /dev/null +++ b/app/src/main/java/eu/faircode/email/CharsetHelper.java @@ -0,0 +1,118 @@ +package eu.faircode.email; + +/* + This file is part of FairEmail. + + FairEmail is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + FairEmail is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with FairEmail. If not, see . + + Copyright 2018-2020 by Marcel Bokhorst (M66B) +*/ + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.charset.UnsupportedCharsetException; + +class CharsetHelper { + static boolean isUTF8(String text) { + // Get extended ASCII characters + byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1); + + int bytes; + for (int i = 0; i < octets.length; i++) { + if ((octets[i] & 0b10000000) == 0b00000000) + bytes = 1; + else if ((octets[i] & 0b11100000) == 0b11000000) + bytes = 2; + else if ((octets[i] & 0b11110000) == 0b11100000) + bytes = 3; + else if ((octets[i] & 0b11111000) == 0b11110000) + bytes = 4; + else if ((octets[i] & 0b11111100) == 0b11111000) + bytes = 5; + else if ((octets[i] & 0b11111110) == 0b11111100) + bytes = 6; + else + return false; + + if (i + bytes > octets.length) + return false; + + while (--bytes > 0) + if ((octets[++i] & 0b11000000) != 0b10000000) + return false; + } + + return true; + } + + static boolean isISO8859(String text) { + // https://en.wikipedia.org/wiki/ISO/IEC_8859-1 + int c; + byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1); + for (byte b : octets) { + c = b & 0xFF; + if (c < 32) + return false; + if (c >= 127 && c < 160) + return false; + } + return true; + } + + static boolean isISO2022JP(String text) { + // https://en.wikipedia.org/wiki/ISO/IEC_2022 + // https://www.sljfaq.org/afaq/encodings.html#encodings-ISO-2022-JP + + try { + Charset.forName("ISO-2022-JP"); + } catch (UnsupportedCharsetException ex) { + return false; + } + + int c; + int escapes = 0; + boolean escaped = false; + boolean parenthesis = false; + boolean dollar = false; + byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1); + for (byte b : octets) { + c = b & 0xFF; + + if (c > 0x7F) + return false; + + if (escaped) { + escaped = false; + if (c == '(') + parenthesis = true; + else if (c == '$') + dollar = true; + } else if (parenthesis) { + parenthesis = false; + if (c == 'B' || c == 'J') + escapes++; + } else if (dollar) { + dollar = false; + if (c == '@' || c == 'B') + escapes++; + } else if (c == 0x1B) + escaped = true; + + if (escapes >= 3) + return true; + } + + return false; + } +} diff --git a/app/src/main/java/eu/faircode/email/Helper.java b/app/src/main/java/eu/faircode/email/Helper.java index 8c97b78315..bfc6ece7a9 100644 --- a/app/src/main/java/eu/faircode/email/Helper.java +++ b/app/src/main/java/eu/faircode/email/Helper.java @@ -961,52 +961,6 @@ public class Helper { return false; } - static boolean isUTF8(String text) { - // Get extended ASCII characters - byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1); - - int bytes; - for (int i = 0; i < octets.length; i++) { - if ((octets[i] & 0b10000000) == 0b00000000) - bytes = 1; - else if ((octets[i] & 0b11100000) == 0b11000000) - bytes = 2; - else if ((octets[i] & 0b11110000) == 0b11100000) - bytes = 3; - else if ((octets[i] & 0b11111000) == 0b11110000) - bytes = 4; - else if ((octets[i] & 0b11111100) == 0b11111000) - bytes = 5; - else if ((octets[i] & 0b11111110) == 0b11111100) - bytes = 6; - else - return false; - - if (i + bytes > octets.length) - return false; - - while (--bytes > 0) - if ((octets[++i] & 0b11000000) != 0b10000000) - return false; - } - - return true; - } - - static boolean isISO8859(String text) { - // https://en.wikipedia.org/wiki/ISO/IEC_8859-1 - int c; - byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1); - for (byte b : octets) { - c = b & 0xFF; - if (c < 32) - return false; - if (c >= 127 && c < 160) - return false; - } - return true; - } - static boolean isSingleScript(String s) { // https://en.wikipedia.org/wiki/IDN_homograph_attack if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) diff --git a/app/src/main/java/eu/faircode/email/MessageHelper.java b/app/src/main/java/eu/faircode/email/MessageHelper.java index 7c071dd362..fae44ad1b8 100644 --- a/app/src/main/java/eu/faircode/email/MessageHelper.java +++ b/app/src/main/java/eu/faircode/email/MessageHelper.java @@ -1139,7 +1139,7 @@ public class MessageHelper { if (header.trim().startsWith("=?")) return header; - if (Helper.isUTF8(header)) { + if (CharsetHelper.isUTF8(header)) { Log.w("Converting " + name + " to UTF-8"); return new String(header.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8); } else { @@ -1730,8 +1730,10 @@ public class MessageHelper { warnings.add(context.getString(R.string.title_no_charset, charset)); if (part.isMimeType("text/plain")) { - if ((TextUtils.isEmpty(charset) || charset.equalsIgnoreCase(StandardCharsets.US_ASCII.name())) && - Helper.isUTF8(result)) { + if (TextUtils.isEmpty(charset) && CharsetHelper.isISO2022JP(result)) + result = new String(result.getBytes(StandardCharsets.ISO_8859_1), "ISO-2022-JP"); + else if ((TextUtils.isEmpty(charset) || charset.equalsIgnoreCase(StandardCharsets.US_ASCII.name())) && + CharsetHelper.isUTF8(result)) { Log.i("Charset plain=UTF8"); result = new String(result.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8); } @@ -1740,7 +1742,9 @@ public class MessageHelper { result = HtmlHelper.flow(result); result = "
" + HtmlHelper.formatPre(result) + "
"; } else if (part.isMimeType("text/html")) { - if (TextUtils.isEmpty(charset)) { + if (TextUtils.isEmpty(charset) && CharsetHelper.isISO2022JP(result)) + result = new String(result.getBytes(StandardCharsets.ISO_8859_1), "ISO-2022-JP"); + else if (TextUtils.isEmpty(charset)) { // // String excerpt = result.substring(0, Math.min(MAX_META_EXCERPT, result.length())); @@ -1760,7 +1764,7 @@ public class MessageHelper { try { Log.i("Charset=" + meta); Charset c = Charset.forName(charset); - if (c.equals(StandardCharsets.UTF_8) && !Helper.isUTF8(result)) + if (c.equals(StandardCharsets.UTF_8) && !CharsetHelper.isUTF8(result)) break; result = new String(result.getBytes(StandardCharsets.ISO_8859_1), charset); break;