Improved UTF8 check

This commit is contained in:
M66B 2020-10-25 12:38:41 +01:00
parent ec8317b1be
commit 10b4338e12
1 changed files with 13 additions and 24 deletions

View File

@ -21,7 +21,11 @@ package eu.faircode.email;
import android.text.TextUtils; import android.text.TextUtils;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
@ -45,32 +49,17 @@ class CharsetHelper {
// Get extended ASCII characters // Get extended ASCII characters
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1); byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
int bytes; CharsetDecoder utf8Decoder = StandardCharsets.UTF_8.newDecoder()
for (int i = 0; i < octets.length; i++) { .onMalformedInput(CodingErrorAction.REPORT)
if ((octets[i] & 0b10000000) == 0b00000000) .onUnmappableCharacter(CodingErrorAction.REPORT);
bytes = 1;
else if ((octets[i] & 0b11100000) == 0b11000000)
bytes = 2;
else if ((octets[i] & 0b11110000) == 0b11100000)
bytes = 3;
else if ((octets[i] & 0b11111000) == 0b11110000)
bytes = 4;
else if ((octets[i] & 0b11111100) == 0b11111000)
bytes = 5;
else if ((octets[i] & 0b11111110) == 0b11111100)
bytes = 6;
else
return false;
if (i + bytes > octets.length) try {
return false; utf8Decoder.decode(ByteBuffer.wrap(octets));
return true;
while (--bytes > 0) } catch (CharacterCodingException ex) {
if ((octets[++i] & 0b11000000) != 0b10000000) Log.w(ex);
return false; return false;
} }
return true;
} }
static Charset detect(String text) { static Charset detect(String text) {