From 5cce1c4ad63bc6430ddd52a721649fb1423bab5a Mon Sep 17 00:00:00 2001 From: M66B Date: Thu, 7 Jan 2021 10:06:33 +0100 Subject: [PATCH] Classifier: scale word frequency using max messages --- .../eu/faircode/email/MessageClassifier.java | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/app/src/main/java/eu/faircode/email/MessageClassifier.java b/app/src/main/java/eu/faircode/email/MessageClassifier.java index 41daf34505..990595b3bf 100644 --- a/app/src/main/java/eu/faircode/email/MessageClassifier.java +++ b/app/src/main/java/eu/faircode/email/MessageClassifier.java @@ -215,15 +215,20 @@ public class MessageClassifier { if (state.maxMatchedWords < MIN_MATCHED_WORDS) return null; + int maxMessages = 0; + for (String clazz : state.classStats.keySet()) { + Integer messages = classMessages.get(account).get(clazz); + if (messages != null && messages > maxMessages) + maxMessages = messages; + } + + if (maxMessages == 0) { + Log.e("Classifier no messages account=" + account); + } + DB db = DB.getInstance(context); List chances = new ArrayList<>(); for (String clazz : state.classStats.keySet()) { - Integer messages = classMessages.get(account).get(clazz); - if (messages == null || messages == 0) { - Log.w("Classifier no messages class=" + account + ":" + clazz); - continue; - } - EntityFolder folder = db.folder().getFolderByName(account, clazz); if (folder == null) { Log.w("Classifier no folder class=" + account + ":" + clazz); @@ -231,10 +236,10 @@ public class MessageClassifier { } Stat stat = state.classStats.get(clazz); - double chance = stat.totalFrequency / messages / state.maxMatchedWords; + double chance = stat.totalFrequency / maxMessages / state.maxMatchedWords; Chance c = new Chance(clazz, chance); EntityLog.log(context, "Classifier " + c + - " frequency=" + stat.totalFrequency + "/" + messages + + " frequency=" + stat.totalFrequency + "/" + maxMessages + " matched=" + stat.matchedWords + "/" + state.maxMatchedWords + " words=" + TextUtils.join(", ", stat.words)); chances.add(c);