Classifier: scale word frequency using max messages

This commit is contained in:
M66B 2021-01-07 10:06:33 +01:00
parent 7cf2b6aaec
commit 5cce1c4ad6
1 changed files with 13 additions and 8 deletions

View File

@ -215,15 +215,20 @@ public class MessageClassifier {
if (state.maxMatchedWords < MIN_MATCHED_WORDS)
return null;
int maxMessages = 0;
for (String clazz : state.classStats.keySet()) {
Integer messages = classMessages.get(account).get(clazz);
if (messages != null && messages > maxMessages)
maxMessages = messages;
}
if (maxMessages == 0) {
Log.e("Classifier no messages account=" + account);
}
DB db = DB.getInstance(context);
List<Chance> chances = new ArrayList<>();
for (String clazz : state.classStats.keySet()) {
Integer messages = classMessages.get(account).get(clazz);
if (messages == null || messages == 0) {
Log.w("Classifier no messages class=" + account + ":" + clazz);
continue;
}
EntityFolder folder = db.folder().getFolderByName(account, clazz);
if (folder == null) {
Log.w("Classifier no folder class=" + account + ":" + clazz);
@ -231,10 +236,10 @@ public class MessageClassifier {
}
Stat stat = state.classStats.get(clazz);
double chance = stat.totalFrequency / messages / state.maxMatchedWords;
double chance = stat.totalFrequency / maxMessages / state.maxMatchedWords;
Chance c = new Chance(clazz, chance);
EntityLog.log(context, "Classifier " + c +
" frequency=" + stat.totalFrequency + "/" + messages +
" frequency=" + stat.totalFrequency + "/" + maxMessages +
" matched=" + stat.matchedWords + "/" + state.maxMatchedWords +
" words=" + TextUtils.join(", ", stat.words));
chances.add(c);