mirror of https://github.com/M66B/FairEmail.git
Classifier simplification
This commit is contained in:
parent
0d6ba41aab
commit
7860a3986f
|
@ -167,41 +167,30 @@ public class MessageClassifier {
|
||||||
}
|
}
|
||||||
|
|
||||||
State state = new State();
|
State state = new State();
|
||||||
state.words.add(null);
|
process(account, currentClass, added, null, state);
|
||||||
|
|
||||||
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
|
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
|
||||||
java.text.BreakIterator boundary = java.text.BreakIterator.getWordInstance();
|
java.text.BreakIterator boundary = java.text.BreakIterator.getWordInstance();
|
||||||
boundary.setText(text);
|
boundary.setText(text);
|
||||||
int start = boundary.first();
|
int start = boundary.first();
|
||||||
for (int end = boundary.next(); end != java.text.BreakIterator.DONE; end = boundary.next()) {
|
for (int end = boundary.next(); end != java.text.BreakIterator.DONE; end = boundary.next()) {
|
||||||
String word = text.substring(start, end).trim().toLowerCase();
|
String word = text.substring(start, end);
|
||||||
if (word.length() > 1 &&
|
process(account, currentClass, added, word, state);
|
||||||
!state.words.contains(word) &&
|
|
||||||
!word.matches(".*\\d.*")) {
|
|
||||||
state.words.add(word);
|
|
||||||
process(account, currentClass, added, state);
|
|
||||||
}
|
|
||||||
start = end;
|
start = end;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// The ICU break iterator can properly handle Chinese texts
|
// The ICU break iterator works better for Chinese texts
|
||||||
android.icu.text.BreakIterator boundary = android.icu.text.BreakIterator.getWordInstance();
|
android.icu.text.BreakIterator boundary = android.icu.text.BreakIterator.getWordInstance();
|
||||||
boundary.setText(text);
|
boundary.setText(text);
|
||||||
int start = boundary.first();
|
int start = boundary.first();
|
||||||
for (int end = boundary.next(); end != android.icu.text.BreakIterator.DONE; end = boundary.next()) {
|
for (int end = boundary.next(); end != android.icu.text.BreakIterator.DONE; end = boundary.next()) {
|
||||||
String word = text.substring(start, end).trim().toLowerCase();
|
String word = text.substring(start, end);
|
||||||
if (word.length() > 1 &&
|
process(account, currentClass, added, word, state);
|
||||||
!state.words.contains(word) &&
|
|
||||||
!word.matches(".*\\d.*")) {
|
|
||||||
state.words.add(word);
|
|
||||||
process(account, currentClass, added, state);
|
|
||||||
}
|
|
||||||
start = end;
|
start = end;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
state.words.add(null);
|
process(account, currentClass, added, null, state);
|
||||||
process(account, currentClass, added, state);
|
|
||||||
|
|
||||||
if (!added)
|
if (!added)
|
||||||
return null;
|
return null;
|
||||||
|
@ -253,7 +242,18 @@ public class MessageClassifier {
|
||||||
return classification;
|
return classification;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void process(long account, String currentClass, boolean added, State state) {
|
private static void process(long account, String currentClass, boolean added, String word, State state) {
|
||||||
|
if (word != null) {
|
||||||
|
word = word.trim().toLowerCase();
|
||||||
|
|
||||||
|
if (word.length() < 2 ||
|
||||||
|
state.words.contains(word) ||
|
||||||
|
word.matches(".*\\d.*"))
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
state.words.add(word);
|
||||||
|
|
||||||
if (state.words.size() < 3)
|
if (state.words.size() < 3)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue