mirror of https://github.com/M66B/FairEmail.git
Use break iterator
This commit is contained in:
parent
aa279e5b57
commit
385a829cba
|
@ -34,6 +34,7 @@ import androidx.work.WorkerParameters;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
import java.text.BreakIterator;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -89,12 +90,15 @@ public class WorkerFts extends Worker {
|
||||||
EntityFolder folder = db.folder().getFolder(message.folder);
|
EntityFolder folder = db.folder().getFolder(message.folder);
|
||||||
if (folder != null) {
|
if (folder != null) {
|
||||||
List<String> features = new ArrayList<>();
|
List<String> features = new ArrayList<>();
|
||||||
for (String word : text.trim().toLowerCase().split("\\W+")) {
|
|
||||||
if (word.matches(".*\\d.*"))
|
BreakIterator boundary = BreakIterator.getWordInstance();
|
||||||
continue;
|
boundary.setText(text);
|
||||||
if (word.endsWith("."))
|
int start = boundary.first();
|
||||||
word = word.substring(0, word.length() - 1);
|
for (int end = boundary.next(); end != BreakIterator.DONE; end = boundary.next()) {
|
||||||
features.add(word);
|
String word = text.substring(start, end);
|
||||||
|
if (word.length() > 1)
|
||||||
|
features.add(word);
|
||||||
|
start = end;
|
||||||
}
|
}
|
||||||
|
|
||||||
Collection<Classification<String, String>> classifications = classifier.classifyDetailed(features);
|
Collection<Classification<String, String>> classifications = classifier.classifyDetailed(features);
|
||||||
|
|
Loading…
Reference in New Issue