Guess UTF-16 LE without BOM

This commit is contained in:
M66B 2022-06-10 22:05:42 +02:00
parent 77a1e96736
commit 7beab967d4
2 changed files with 44 additions and 1 deletions

View File

@ -22,6 +22,8 @@ package eu.faircode.email;
import android.text.TextUtils;
import android.util.Pair;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
@ -130,6 +132,33 @@ public class CharsetHelper {
return true;
}
static Boolean isUTF16LE(BufferedInputStream bis) throws IOException {
byte[] bytes = new byte[64];
bis.mark(bytes.length);
try {
int count = bis.read(bytes);
if (count < 32)
return null;
int s = ((bytes[0] & 0xff) << 8) | (bytes[1] & 0xff);
boolean bom = (s == 0xfeff || s == 0xfffe);
if (bom)
return null;
int odd = 0;
int even = 0;
for (int i = 0; i < count; i++)
if (bytes[i] == 0)
if (i % 2 == 0)
even++;
else
odd++;
return (even < 30 * count / 100 / 2 && odd > 70 * count / 100 / 2);
} finally {
bis.reset();
}
}
static String utf8toW1252(String text) {
try {
Charset w1252 = Charset.forName("windows-1252");

View File

@ -3112,7 +3112,21 @@ public class MessageHelper {
String result;
try {
Object content = h.part.getContent();
Object content;
// Check for UTF-16 LE without BOM
String pcharset = h.contentType.getParameter("charset");
if ("utf-16".equalsIgnoreCase(pcharset) && override == null) {
String charset = pcharset;
BufferedInputStream bis = new BufferedInputStream(h.part.getDataHandler().getInputStream());
if (Boolean.TRUE.equals(CharsetHelper.isUTF16LE(bis))) {
charset = StandardCharsets.UTF_16LE.name();
Log.e("Charset " + pcharset + " -> " + charset);
}
content = Helper.readStream(bis, Charset.forName(charset));
} else
content = h.part.getContent();
Log.i("Content class=" + (content == null ? null : content.getClass().getName()));
if (content == null) {