Fixed/simplified charset transformation

This commit is contained in:
M66B 2020-01-31 17:11:36 +01:00
parent 54d6b2b70b
commit 47a0b5e361
4 changed files with 89 additions and 118 deletions

View File

@ -45,7 +45,6 @@ import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -1252,37 +1251,11 @@ public class MessageHelper {
try { try {
ContentType ct = new ContentType(part.getContentType()); ContentType ct = new ContentType(part.getContentType());
String charset = ct.getParameter("charset"); String charset = ct.getParameter("charset");
if (charset == null) if (UnknownCharsetProvider.charsetForMime(charset) == null)
charset = StandardCharsets.ISO_8859_1.name();
else {
charset = charset.replace("\"", "");
charset = MimeUtility.javaCharset(charset);
boolean supported = false;
try {
supported = Charset.isSupported(charset);
} catch (IllegalCharsetNameException ex) {
Log.e(charset, ex);
}
if (!supported) {
// x-binaryenc
// UseInqueCodePage
// none
// unknown-8bit
// X-UNKNOWN
Log.e("Unsupported encoding charset=" + charset);
warnings.add(context.getString(R.string.title_no_charset, charset)); warnings.add(context.getString(R.string.title_no_charset, charset));
charset = StandardCharsets.ISO_8859_1.name();
}
}
result = new String(result.getBytes(Charset.forName(charset)));
} catch (ParseException ex) { } catch (ParseException ex) {
Log.w(ex); Log.e(ex);
warnings.add(Log.formatThrowable(ex, false));
} }
if (part == plain) if (part == plain)

View File

@ -0,0 +1,85 @@
package eu.faircode.email;
/*
This file is part of FairEmail.
FairEmail is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
FairEmail is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with FairEmail. If not, see <http://www.gnu.org/licenses/>.
Copyright 2018-2020 by Marcel Bokhorst (M66B)
*/
import android.text.TextUtils;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.charset.spi.CharsetProvider;
import java.util.Collections;
import java.util.Iterator;
import javax.mail.internet.MimeUtility;
public class UnknownCharsetProvider extends CharsetProvider {
@Override
public Iterator<Charset> charsets() {
return Collections.emptyIterator();
}
@Override
public Charset charsetForName(String name) {
try {
Charset charset = charsetForMime(name);
return (charset == null ? StandardCharsets.ISO_8859_1 : charset);
} catch (Throwable ex) {
Log.e(ex);
return StandardCharsets.ISO_8859_1;
}
}
public static Charset charsetForMime(String name) {
// x-binaryenc
// UseInqueCodePage
// none
// unknown-8bit
// X-UNKNOWN
// https://javaee.github.io/javamail/FAQ#unsupen
// https://github.com/javaee/javamail/blob/master/mail/src/main/resources/META-INF/javamail.charset.map
try {
if (name == null)
name = "";
name = name.replace("\"", "");
int sp = name.indexOf(" ");
if (sp > 0)
name = name.substring(0, sp);
name = name.trim();
if (TextUtils.isEmpty(name))
return StandardCharsets.ISO_8859_1;
if ("x-IA5".equalsIgnoreCase(name))
return StandardCharsets.ISO_8859_1;
if ("ASCII".equalsIgnoreCase(name))
return StandardCharsets.ISO_8859_1;
if ("ISO8859-16".equalsIgnoreCase(name))
return StandardCharsets.ISO_8859_1;
if ("UTF-8//TRANSLIT".equalsIgnoreCase(name))
return StandardCharsets.UTF_8;
// Android will prevent recursion
String jname = MimeUtility.javaCharset(name);
return Charset.forName(jname);
} catch (Throwable ex) {
Log.e("Unknown charset " + name, ex);
return null;
}
}
}

View File

@ -1,88 +0,0 @@
### JDK-to-MIME charset mapping table ####
### This should be the first mapping table ###
### FairEmail
8859_16 ISO-8859-1
iso8859_16 ISO-8859-1
ISO8859-16 ISO-8859-1
ascii ISO-8859-1
ASCII ISO-8859-1
### Inherited
8859_1 ISO-8859-1
iso8859_1 ISO-8859-1
ISO8859-1 ISO-8859-1
8859_2 ISO-8859-2
iso8859_2 ISO-8859-2
ISO8859-2 ISO-8859-2
8859_3 ISO-8859-3
iso8859_3 ISO-8859-3
ISO8859-3 ISO-8859-3
8859_4 ISO-8859-4
iso8859_4 ISO-8859-4
ISO8859-4 ISO-8859-4
8859_5 ISO-8859-5
iso8859_5 ISO-8859-5
ISO8859-5 ISO-8859-5
8859_6 ISO-8859-6
iso8859_6 ISO-8859-6
ISO8859-6 ISO-8859-6
8859_7 ISO-8859-7
iso8859_7 ISO-8859-7
ISO8859-7 ISO-8859-7
8859_8 ISO-8859-8
iso8859_8 ISO-8859-8
ISO8859-8 ISO-8859-8
8859_9 ISO-8859-9
iso8859_9 ISO-8859-9
ISO8859-9 ISO-8859-9
SJIS Shift_JIS
JIS ISO-2022-JP
ISO2022JP ISO-2022-JP
EUC_JP euc-jp
KOI8_R koi8-r
EUC_CN euc-cn
EUC_TW euc-tw
EUC_KR euc-kr
--DIVIDER: this line *must* start with "--" and end with "--" --
#### XXX-to-JDK charset mapping table ####
iso-2022-cn ISO2022CN
iso-2022-kr ISO2022KR
utf-8 UTF8
utf8 UTF8
ja_jp.iso2022-7 ISO2022JP
ja_jp.eucjp EUCJIS
# these two are not needed in 1.1.6. (since EUC_KR exists
# and KSC5601 will map to the correct converter)
euc-kr KSC5601
euckr KSC5601
# in JDK 1.1.6 we will no longer need the "us-ascii" convert
us-ascii ISO-8859-1
x-us-ascii ISO-8859-1
# Chinese charsets are a mess and widely misrepresented.
# gb18030 is a superset of gbk, which is a supserset of cp936/ms936,
# which is a superset of gb2312.
# https://bugzilla.gnome.org/show_bug.cgi?id=446783
# map all of these to gb18030.
gb2312 GB18030
cp936 GB18030
ms936 GB18030
gbk GB18030

View File

@ -0,0 +1 @@
eu.faircode.email.UnknownCharsetProvider