EML: show detected charset

This commit is contained in:
M66B 2021-06-25 09:09:12 +02:00
parent aa7edaf99d
commit a56005b95d
2 changed files with 35 additions and 1 deletions

View File

@ -64,6 +64,8 @@ import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Enumeration;
@ -77,6 +79,7 @@ import javax.mail.Message;
import javax.mail.Multipart;
import javax.mail.Part;
import javax.mail.Session;
import javax.mail.internet.ContentType;
import javax.mail.internet.MimeMessage;
public class ActivityEML extends ActivityBase {
@ -422,6 +425,35 @@ public class ActivityEML extends ActivityBase {
.append(size > 0 ? Helper.humanReadableByteCount(size) : "?")
.append('\n');
if (!part.isMimeType("multipart/*")) {
Object content = part.getContent();
if (content instanceof String) {
String text = (String) content;
Charset detected = CharsetHelper.detect(text);
String charset;
try {
ContentType ct = new ContentType(part.getContentType());
charset = ct.getParameter("charset");
} catch (Throwable ignored) {
charset = null;
}
if (charset == null)
charset = StandardCharsets.ISO_8859_1.name();
Charset cs = Charset.forName(charset);
boolean isUtf8 = CharsetHelper.isUTF8(text.getBytes(cs));
for (int i = 0; i < level; i++)
ssb.append(" ");
ssb.append("Detected: ")
.append(detected == null ? "?" : detected.toString())
.append(" isUTF8=").append(Boolean.toString(isUtf8))
.append('\n');
}
}
ssb.append('\n');
if (part.isMimeType("multipart/*")) {

View File

@ -48,11 +48,13 @@ public class CharsetHelper {
static boolean isUTF8(String text) {
// Get extended ASCII characters
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
return isUTF8(octets);
}
static boolean isUTF8(byte[] octets) {
CharsetDecoder utf8Decoder = StandardCharsets.UTF_8.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
try {
utf8Decoder.decode(ByteBuffer.wrap(octets));
return true;