Consider ISO 8859-1 as no charset

This commit is contained in:
M66B 2023-07-18 18:16:37 +02:00
parent 901cb7bbea
commit abcf472476
1 changed files with 19 additions and 16 deletions

View File

@ -3800,7 +3800,7 @@ public class MessageHelper {
Log.w(ex); Log.w(ex);
} }
if (cs == null) { if (cs == null || StandardCharsets.ISO_8859_1.equals(cs)) {
// <meta charset="utf-8" /> // <meta charset="utf-8" />
// <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> // <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
String excerpt = result.substring(0, Math.min(MAX_META_EXCERPT, result.length())); String excerpt = result.substring(0, Math.min(MAX_META_EXCERPT, result.length()));
@ -3826,34 +3826,37 @@ public class MessageHelper {
if (StandardCharsets.US_ASCII.equals(c)) if (StandardCharsets.US_ASCII.equals(c))
break; break;
// Check if really UTF-8
if (StandardCharsets.UTF_8.equals(c) && !CharsetHelper.isUTF8(result)) {
Log.w("Charset meta=" + meta + " !isUTF8");
break;
}
// 16 bits charsets cannot be converted to 8 bits // 16 bits charsets cannot be converted to 8 bits
if (CHARSET16.contains(c)) { if (CHARSET16.contains(c)) {
Log.w("Charset meta=" + meta); Log.w("Charset meta=" + meta);
break; break;
} }
Charset detected = CharsetHelper.detect(result, c); // Check if really UTF-8
if (c.equals(detected)) if (StandardCharsets.UTF_8.equals(c) && !CharsetHelper.isUTF8(result)) {
Log.w("Charset meta=" + meta + " !isUTF8");
break; break;
}
// Check if same as detected charset
Charset detected = CharsetHelper.detect(result, c);
if (!c.equals(detected)) {
Log.w("Charset meta=" + meta + " !is" + detected);
break;
}
// Common detected/meta // Common detected/meta
// - windows-1250, windows-1257 / ISO-8859-1 // - windows-1250, windows-1257 / ISO-8859-1
// - ISO-8859-1 / windows-1252 // - ISO-8859-1 / windows-1252
// - US-ASCII / windows-1250, windows-1252, ISO-8859-1, ISO-8859-15, UTF-8 // - US-ASCII / windows-1250, windows-1252, ISO-8859-1, ISO-8859-15, UTF-8
if (StandardCharsets.US_ASCII.equals(detected) && //if (StandardCharsets.US_ASCII.equals(detected) &&
("ISO-8859-15".equals(c.name()) || // ("ISO-8859-15".equals(c.name()) ||
"windows-1250".equals(c.name()) || // "windows-1250".equals(c.name()) ||
"windows-1252".equals(c.name()) || // "windows-1252".equals(c.name()) ||
StandardCharsets.UTF_8.equals(c) || // StandardCharsets.UTF_8.equals(c) ||
StandardCharsets.ISO_8859_1.equals(c))) // StandardCharsets.ISO_8859_1.equals(c)))
break; // break;
// Convert // Convert
Log.w("Converting detected=" + detected + " meta=" + c); Log.w("Converting detected=" + detected + " meta=" + c);