Use ICU break iterator on recent Android versions

This commit is contained in:
M66B 2021-01-06 21:43:26 +01:00
parent 502357b4fd
commit c3bbd71469
1 changed files with 29 additions and 12 deletions

View File

@ -21,6 +21,7 @@ package eu.faircode.email;
import android.content.Context; import android.content.Context;
import android.content.SharedPreferences; import android.content.SharedPreferences;
import android.os.Build;
import android.text.TextUtils; import android.text.TextUtils;
import androidx.preference.PreferenceManager; import androidx.preference.PreferenceManager;
@ -32,7 +33,6 @@ import org.json.JSONObject;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.text.BreakIterator;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
@ -175,10 +175,11 @@ public class MessageClassifier {
State state = new State(); State state = new State();
state.words.add(null); state.words.add(null);
BreakIterator boundary = BreakIterator.getWordInstance(); // TODO ICU if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
java.text.BreakIterator boundary = java.text.BreakIterator.getWordInstance();
boundary.setText(text); boundary.setText(text);
int start = boundary.first(); int start = boundary.first();
for (int end = boundary.next(); end != BreakIterator.DONE; end = boundary.next()) { for (int end = boundary.next(); end != java.text.BreakIterator.DONE; end = boundary.next()) {
String word = text.substring(start, end).toLowerCase(); String word = text.substring(start, end).toLowerCase();
if (word.length() > 1 && if (word.length() > 1 &&
!state.words.contains(word) && !state.words.contains(word) &&
@ -188,6 +189,22 @@ public class MessageClassifier {
} }
start = end; start = end;
} }
} else {
// The ICU break iterator can properly handle Chinese texts
android.icu.text.BreakIterator boundary = android.icu.text.BreakIterator.getWordInstance();
boundary.setText(text);
int start = boundary.first();
for (int end = boundary.next(); end != android.icu.text.BreakIterator.DONE; end = boundary.next()) {
String word = text.substring(start, end).toLowerCase();
if (word.length() > 1 &&
!state.words.contains(word) &&
!word.matches(".*\\d.*")) {
state.words.add(word);
process(account, currentClass, added, state);
}
start = end;
}
}
state.words.add(null); state.words.add(null);
process(account, currentClass, added, state); process(account, currentClass, added, state);