Use break iterator

This commit is contained in:
M66B 2020-12-29 09:45:50 +01:00
parent aa279e5b57
commit 385a829cba
1 changed files with 10 additions and 6 deletions

View File

@ -34,6 +34,7 @@ import androidx.work.WorkerParameters;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.text.BreakIterator;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
@ -89,12 +90,15 @@ public class WorkerFts extends Worker {
EntityFolder folder = db.folder().getFolder(message.folder); EntityFolder folder = db.folder().getFolder(message.folder);
if (folder != null) { if (folder != null) {
List<String> features = new ArrayList<>(); List<String> features = new ArrayList<>();
for (String word : text.trim().toLowerCase().split("\\W+")) {
if (word.matches(".*\\d.*")) BreakIterator boundary = BreakIterator.getWordInstance();
continue; boundary.setText(text);
if (word.endsWith(".")) int start = boundary.first();
word = word.substring(0, word.length() - 1); for (int end = boundary.next(); end != BreakIterator.DONE; end = boundary.next()) {
features.add(word); String word = text.substring(start, end);
if (word.length() > 1)
features.add(word);
start = end;
} }
Collection<Classification<String, String>> classifications = classifier.classifyDetailed(features); Collection<Classification<String, String>> classifications = classifier.classifyDetailed(features);