diff --git a/app/src/main/java/eu/faircode/email/MessageHelper.java b/app/src/main/java/eu/faircode/email/MessageHelper.java
index 2ae57ffee6..fba43d424b 100644
--- a/app/src/main/java/eu/faircode/email/MessageHelper.java
+++ b/app/src/main/java/eu/faircode/email/MessageHelper.java
@@ -3109,21 +3109,35 @@ public class MessageHelper {
return null;
}
- String result;
+ // Check character set
+ String charset = h.contentType.getParameter("charset");
+ if (UnknownCharsetProvider.charsetForMime(charset) == null)
+ warnings.add(context.getString(R.string.title_no_charset, charset));
+ if (TextUtils.isEmpty(charset) ||
+ charset.equalsIgnoreCase(StandardCharsets.US_ASCII.name()))
+ charset = null;
+
+ Charset cs = null;
+ if (charset != null)
+ try {
+ cs = Charset.forName(charset);
+ } catch (UnsupportedCharsetException ignored) {
+ cs = null;
+ }
+
+ String result;
try {
Object content;
// Check for UTF-16 LE without BOM
- String pcharset = h.contentType.getParameter("charset");
- if ("utf-16".equalsIgnoreCase(pcharset) && override == null) {
- String charset = pcharset;
+ if (StandardCharsets.UTF_16.equals(cs) && override == null) {
BufferedInputStream bis = new BufferedInputStream(h.part.getDataHandler().getInputStream());
if (Boolean.TRUE.equals(CharsetHelper.isUTF16LE(bis))) {
- charset = StandardCharsets.UTF_16LE.name();
- Log.e("Charset " + pcharset + " -> " + charset);
+ Log.e("Charset " + cs + " -> UTF16LE");
+ cs = StandardCharsets.UTF_16LE;
}
- content = Helper.readStream(bis, Charset.forName(charset));
+ content = Helper.readStream(bis, cs);
} else
content = h.part.getContent();
@@ -3141,15 +3155,8 @@ public class MessageHelper {
// Typically com.sun.mail.util.QPDecoderStream
if (BuildConfig.DEBUG && false)
warnings.add(content.getClass().getName());
- Charset charset;
- try {
- String cs = h.contentType.getParameter("charset");
- charset = (cs == null ? StandardCharsets.ISO_8859_1 : Charset.forName(cs));
- } catch (Throwable ex) {
- Log.w(ex);
- charset = StandardCharsets.ISO_8859_1;
- }
- result = Helper.readStream((InputStream) content, charset);
+ result = Helper.readStream((InputStream) content,
+ cs == null ? StandardCharsets.ISO_8859_1 : cs);
} else {
Log.e(content.getClass().getName());
result = content.toString();
@@ -3169,24 +3176,9 @@ public class MessageHelper {
return null;
}
- // Check character set
- String charset = h.contentType.getParameter("charset");
- if (UnknownCharsetProvider.charsetForMime(charset) == null)
- warnings.add(context.getString(R.string.title_no_charset, charset));
-
- if ((TextUtils.isEmpty(charset) || charset.equalsIgnoreCase(StandardCharsets.US_ASCII.name())))
- charset = null;
-
- Charset cs = null;
- try {
- if (charset != null)
- cs = Charset.forName(charset);
- } catch (UnsupportedCharsetException ignored) {
- }
-
if (h.isPlainText()) {
if (override == null) {
- if (charset == null || StandardCharsets.ISO_8859_1.equals(cs)) {
+ if (cs == null || StandardCharsets.ISO_8859_1.equals(cs)) {
if (StandardCharsets.ISO_8859_1.equals(cs) && CharsetHelper.isUTF8(result)) {
Log.i("Charset upgrade=UTF8");
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
@@ -3254,11 +3246,11 @@ public class MessageHelper {
try {
if (CHARSET16.contains(cs)) {
Charset detected = CharsetHelper.detect(result, cs);
- // UTF-16 can be detected as US-ASCII
if (!CHARSET16.contains(detected))
Log.w(new Throwable("Charset=" + cs + " detected=" + detected));
+ // UTF-16 can be detected as US-ASCII
if (StandardCharsets.UTF_8.equals(detected)) {
- charset = null;
+ cs = null;
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), detected);
}
}
@@ -3266,26 +3258,27 @@ public class MessageHelper {
Log.w(ex);
}
- if (charset == null) {
+ if (cs == null) {
//
//
String excerpt = result.substring(0, Math.min(MAX_META_EXCERPT, result.length()));
Document d = JsoupEx.parse(excerpt);
for (Element meta : d.select("meta")) {
+ String mcharset = null;
if ("Content-Type".equalsIgnoreCase(meta.attr("http-equiv"))) {
try {
ContentType ct = new ContentType(meta.attr("content"));
- charset = ct.getParameter("charset");
+ mcharset = ct.getParameter("charset");
} catch (ParseException ex) {
Log.w(ex);
}
} else
- charset = meta.attr("charset");
+ mcharset = meta.attr("charset");
- if (!TextUtils.isEmpty(charset))
+ if (!TextUtils.isEmpty(mcharset))
try {
Log.i("Charset meta=" + meta);
- Charset c = Charset.forName(charset);
+ Charset c = Charset.forName(mcharset);
// US-ASCII is a subset of ISO8859-1
if (StandardCharsets.US_ASCII.equals(c))