2021-01-02 13:33:53 +00:00
|
|
|
package eu.faircode.email;
|
|
|
|
|
|
|
|
/*
|
|
|
|
This file is part of FairEmail.
|
|
|
|
|
|
|
|
FairEmail is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
FairEmail is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with FairEmail. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
Copyright 2018-2021 by Marcel Bokhorst (M66B)
|
|
|
|
*/
|
|
|
|
|
|
|
|
import android.content.Context;
|
2021-01-02 14:57:24 +00:00
|
|
|
import android.content.SharedPreferences;
|
2021-01-04 14:28:45 +00:00
|
|
|
import android.text.TextUtils;
|
2021-01-02 13:33:53 +00:00
|
|
|
|
2021-01-02 14:57:24 +00:00
|
|
|
import androidx.preference.PreferenceManager;
|
|
|
|
|
2021-01-02 13:33:53 +00:00
|
|
|
import org.jetbrains.annotations.NotNull;
|
2021-01-02 19:42:35 +00:00
|
|
|
import org.json.JSONArray;
|
|
|
|
import org.json.JSONException;
|
|
|
|
import org.json.JSONObject;
|
2021-01-02 13:33:53 +00:00
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.text.BreakIterator;
|
|
|
|
import java.util.ArrayList;
|
2021-01-04 14:28:45 +00:00
|
|
|
import java.util.Arrays;
|
2021-01-02 13:33:53 +00:00
|
|
|
import java.util.Collections;
|
|
|
|
import java.util.Comparator;
|
2021-01-03 07:41:29 +00:00
|
|
|
import java.util.Date;
|
2021-01-02 13:33:53 +00:00
|
|
|
import java.util.HashMap;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
|
|
|
|
2021-01-04 14:28:45 +00:00
|
|
|
import javax.mail.Address;
|
|
|
|
import javax.mail.internet.InternetAddress;
|
|
|
|
|
2021-01-02 13:33:53 +00:00
|
|
|
public class MessageClassifier {
|
2021-01-02 19:42:35 +00:00
|
|
|
private static boolean loaded = false;
|
2021-01-02 21:52:41 +00:00
|
|
|
private static boolean dirty = false;
|
2021-01-03 11:23:51 +00:00
|
|
|
private static final Map<Long, Map<String, Integer>> classMessages = new HashMap<>();
|
|
|
|
private static final Map<Long, Map<String, Map<String, Integer>>> wordClassFrequency = new HashMap<>();
|
2021-01-02 13:33:53 +00:00
|
|
|
|
2021-01-03 09:42:59 +00:00
|
|
|
private static final int MIN_MATCHED_WORDS = 10;
|
2021-01-02 13:33:53 +00:00
|
|
|
private static final double CHANCE_THRESHOLD = 2.0;
|
|
|
|
|
2021-01-05 18:51:22 +00:00
|
|
|
static void classify(EntityMessage message, EntityFolder folder, EntityFolder target, Context context) {
|
2021-01-02 19:42:35 +00:00
|
|
|
try {
|
2021-01-03 11:23:51 +00:00
|
|
|
if (!isEnabled(context))
|
|
|
|
return;
|
2021-01-03 07:27:27 +00:00
|
|
|
|
2021-01-05 18:51:22 +00:00
|
|
|
if (!canClassify(folder.type))
|
2021-01-03 11:23:51 +00:00
|
|
|
return;
|
2021-01-02 13:33:53 +00:00
|
|
|
|
2021-01-05 18:51:22 +00:00
|
|
|
if (target != null && !canClassify(target.type))
|
2021-01-03 11:23:51 +00:00
|
|
|
return;
|
2021-01-02 19:50:43 +00:00
|
|
|
|
2021-01-03 11:23:51 +00:00
|
|
|
File file = message.getFile(context);
|
|
|
|
if (!file.exists())
|
|
|
|
return;
|
2021-01-02 13:33:53 +00:00
|
|
|
|
2021-01-06 07:31:34 +00:00
|
|
|
long start = new Date().getTime();
|
|
|
|
|
2021-01-05 18:51:22 +00:00
|
|
|
// Build text to classify
|
2021-01-04 13:37:54 +00:00
|
|
|
StringBuilder sb = new StringBuilder();
|
2021-01-04 14:28:45 +00:00
|
|
|
|
|
|
|
List<Address> addresses = new ArrayList<>();
|
|
|
|
if (message.from != null)
|
|
|
|
addresses.addAll(Arrays.asList(message.from));
|
|
|
|
if (message.to != null)
|
|
|
|
addresses.addAll(Arrays.asList(message.to));
|
|
|
|
if (message.cc != null)
|
|
|
|
addresses.addAll(Arrays.asList(message.cc));
|
|
|
|
if (message.bcc != null)
|
|
|
|
addresses.addAll(Arrays.asList(message.bcc));
|
|
|
|
if (message.reply != null)
|
|
|
|
addresses.addAll(Arrays.asList(message.reply));
|
|
|
|
|
|
|
|
for (Address address : addresses) {
|
|
|
|
String email = ((InternetAddress) address).getAddress();
|
|
|
|
String name = ((InternetAddress) address).getAddress();
|
|
|
|
if (!TextUtils.isEmpty(email)) {
|
|
|
|
sb.append(email).append('\n');
|
|
|
|
int at = email.indexOf('@');
|
|
|
|
String domain = (at < 0 ? null : email.substring(at + 1));
|
|
|
|
if (!TextUtils.isEmpty(domain))
|
|
|
|
sb.append(domain).append('\n');
|
|
|
|
}
|
|
|
|
if (!TextUtils.isEmpty(name))
|
|
|
|
sb.append(name).append('\n');
|
|
|
|
}
|
|
|
|
|
2021-01-04 13:37:54 +00:00
|
|
|
if (message.subject != null)
|
|
|
|
sb.append(message.subject).append('\n');
|
2021-01-04 14:28:45 +00:00
|
|
|
|
2021-01-04 13:37:54 +00:00
|
|
|
sb.append(HtmlHelper.getFullText(file));
|
2021-01-04 14:28:45 +00:00
|
|
|
|
2021-01-04 13:37:54 +00:00
|
|
|
if (sb.length() == 0)
|
2021-01-03 11:23:51 +00:00
|
|
|
return;
|
2021-01-02 13:33:53 +00:00
|
|
|
|
2021-01-05 18:51:22 +00:00
|
|
|
// Load data if needed
|
2021-01-03 11:23:51 +00:00
|
|
|
load(context);
|
2021-01-02 21:52:41 +00:00
|
|
|
|
2021-01-05 18:51:22 +00:00
|
|
|
// Initialize data if needed
|
|
|
|
if (!classMessages.containsKey(folder.account))
|
|
|
|
classMessages.put(folder.account, new HashMap<>());
|
|
|
|
if (!wordClassFrequency.containsKey(folder.account))
|
|
|
|
wordClassFrequency.put(folder.account, new HashMap<>());
|
2021-01-03 11:23:51 +00:00
|
|
|
|
2021-01-05 18:51:22 +00:00
|
|
|
// Classify text
|
|
|
|
String classified = classify(folder.account, folder.name, sb.toString(), target == null, context);
|
2021-01-03 11:23:51 +00:00
|
|
|
|
2021-01-06 07:31:34 +00:00
|
|
|
long elapsed = new Date().getTime() - start;
|
2021-01-03 11:23:51 +00:00
|
|
|
EntityLog.log(context, "Classifier" +
|
|
|
|
" folder=" + folder.name +
|
|
|
|
" message=" + message.id +
|
|
|
|
"@" + new Date(message.received) +
|
|
|
|
":" + message.subject +
|
2021-01-04 11:21:51 +00:00
|
|
|
" class=" + classified +
|
2021-01-06 07:31:34 +00:00
|
|
|
" re=" + message.auto_classified +
|
|
|
|
" elapsed=" + elapsed);
|
2021-01-03 11:23:51 +00:00
|
|
|
|
2021-01-05 18:51:22 +00:00
|
|
|
// Update message count
|
|
|
|
Integer m = classMessages.get(folder.account).get(folder.name);
|
|
|
|
if (target == null) {
|
2021-01-03 11:23:51 +00:00
|
|
|
m = (m == null ? 1 : m + 1);
|
2021-01-05 18:51:22 +00:00
|
|
|
classMessages.get(folder.account).put(folder.name, m);
|
2021-01-03 11:23:51 +00:00
|
|
|
} else {
|
|
|
|
if (m != null && m > 0)
|
2021-01-05 18:51:22 +00:00
|
|
|
classMessages.get(folder.account).put(folder.name, m - 1);
|
2021-01-03 11:23:51 +00:00
|
|
|
}
|
2021-01-05 18:51:22 +00:00
|
|
|
EntityLog.log(context, "Classifier classify=" + folder.name +
|
|
|
|
" messages=" + classMessages.get(folder.account).get(folder.name));
|
2021-01-03 11:23:51 +00:00
|
|
|
|
|
|
|
dirty = true;
|
|
|
|
|
2021-01-05 18:51:22 +00:00
|
|
|
// Auto classify
|
|
|
|
if (classified != null &&
|
|
|
|
!classified.equals(folder.name) &&
|
|
|
|
!message.auto_classified &&
|
|
|
|
!EntityFolder.JUNK.equals(folder.type)) {
|
|
|
|
DB db = DB.getInstance(context);
|
2021-01-04 11:21:51 +00:00
|
|
|
try {
|
|
|
|
db.beginTransaction();
|
|
|
|
|
2021-01-05 18:51:22 +00:00
|
|
|
EntityFolder dest = db.folder().getFolderByName(folder.account, classified);
|
|
|
|
if (dest != null && dest.auto_classify) {
|
|
|
|
EntityOperation.queue(context, message, EntityOperation.MOVE, dest.id, false, true);
|
2021-01-04 08:27:27 +00:00
|
|
|
message.ui_hide = true;
|
|
|
|
}
|
2021-01-04 11:21:51 +00:00
|
|
|
|
|
|
|
db.setTransactionSuccessful();
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
db.endTransaction();
|
|
|
|
}
|
2021-01-05 18:51:22 +00:00
|
|
|
}
|
2021-01-03 11:23:51 +00:00
|
|
|
} catch (Throwable ex) {
|
|
|
|
Log.e(ex);
|
2021-01-02 21:38:37 +00:00
|
|
|
}
|
2021-01-02 13:33:53 +00:00
|
|
|
}
|
|
|
|
|
2021-01-06 10:39:41 +00:00
|
|
|
private static String classify(long account, String currentClass, String text, boolean added, Context context) {
|
2021-01-02 13:33:53 +00:00
|
|
|
int maxMatchedWords = 0;
|
|
|
|
List<String> words = new ArrayList<>();
|
|
|
|
Map<String, Stat> classStats = new HashMap<>();
|
|
|
|
|
|
|
|
BreakIterator boundary = BreakIterator.getWordInstance(); // TODO ICU
|
|
|
|
boundary.setText(text);
|
|
|
|
int start = boundary.first();
|
|
|
|
for (int end = boundary.next(); end != BreakIterator.DONE; end = boundary.next()) {
|
|
|
|
String word = text.substring(start, end).toLowerCase();
|
|
|
|
if (word.length() > 1 &&
|
|
|
|
!words.contains(word) &&
|
|
|
|
!word.matches(".*\\d.*")) {
|
|
|
|
words.add(word);
|
|
|
|
|
2021-01-02 20:39:09 +00:00
|
|
|
Map<String, Integer> classFrequency = wordClassFrequency.get(account).get(word);
|
2021-01-02 17:18:27 +00:00
|
|
|
if (added) {
|
|
|
|
if (classFrequency == null) {
|
|
|
|
classFrequency = new HashMap<>();
|
2021-01-02 20:39:09 +00:00
|
|
|
wordClassFrequency.get(account).put(word, classFrequency);
|
2021-01-02 17:18:27 +00:00
|
|
|
}
|
2021-01-02 13:33:53 +00:00
|
|
|
|
2021-01-05 12:26:46 +00:00
|
|
|
for (String clazz : classFrequency.keySet()) {
|
2021-01-02 17:18:27 +00:00
|
|
|
int frequency = classFrequency.get(clazz);
|
|
|
|
|
|
|
|
Stat stat = classStats.get(clazz);
|
|
|
|
if (stat == null) {
|
|
|
|
stat = new Stat();
|
|
|
|
classStats.put(clazz, stat);
|
2021-01-02 13:33:53 +00:00
|
|
|
}
|
|
|
|
|
2021-01-02 17:18:27 +00:00
|
|
|
stat.matchedWords++;
|
|
|
|
stat.totalFrequency += frequency;
|
2021-01-06 17:23:58 +00:00
|
|
|
if (BuildConfig.DEBUG)
|
|
|
|
stat.words.add(word);
|
2021-01-02 13:33:53 +00:00
|
|
|
|
2021-01-02 17:18:27 +00:00
|
|
|
if (stat.matchedWords > maxMatchedWords)
|
|
|
|
maxMatchedWords = stat.matchedWords;
|
2021-01-02 13:33:53 +00:00
|
|
|
}
|
|
|
|
|
2021-01-06 10:39:41 +00:00
|
|
|
Integer c = classFrequency.get(currentClass);
|
2021-01-02 17:18:27 +00:00
|
|
|
c = (c == null ? 1 : c + 1);
|
2021-01-06 10:39:41 +00:00
|
|
|
classFrequency.put(currentClass, c);
|
2021-01-02 17:18:27 +00:00
|
|
|
} else {
|
2021-01-06 10:39:41 +00:00
|
|
|
Integer c = (classFrequency == null ? null : classFrequency.get(currentClass));
|
2021-01-02 17:18:27 +00:00
|
|
|
if (c != null)
|
|
|
|
if (c > 0)
|
2021-01-06 10:39:41 +00:00
|
|
|
classFrequency.put(currentClass, c - 1);
|
2021-01-02 17:18:27 +00:00
|
|
|
else
|
2021-01-06 10:39:41 +00:00
|
|
|
classFrequency.remove(currentClass);
|
2021-01-02 13:33:53 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
start = end;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!added)
|
|
|
|
return null;
|
|
|
|
|
2021-01-05 11:55:12 +00:00
|
|
|
if (maxMatchedWords == 0)
|
|
|
|
return null;
|
|
|
|
|
2021-01-05 12:18:17 +00:00
|
|
|
DB db = DB.getInstance(context);
|
2021-01-02 13:33:53 +00:00
|
|
|
List<Chance> chances = new ArrayList<>();
|
|
|
|
for (String clazz : classStats.keySet()) {
|
2021-01-05 11:55:12 +00:00
|
|
|
Integer messages = classMessages.get(account).get(clazz);
|
2021-01-05 18:31:35 +00:00
|
|
|
if (messages == null || messages == 0) {
|
|
|
|
Log.w("Classifier no messages class=" + account + ":" + clazz);
|
2021-01-05 07:45:14 +00:00
|
|
|
continue;
|
2021-01-05 18:31:35 +00:00
|
|
|
}
|
2021-01-05 07:45:14 +00:00
|
|
|
|
2021-01-05 12:18:17 +00:00
|
|
|
EntityFolder folder = db.folder().getFolderByName(account, clazz);
|
2021-01-05 18:31:35 +00:00
|
|
|
if (folder == null) {
|
|
|
|
Log.w("Classifier no folder class=" + account + ":" + clazz);
|
2021-01-05 12:18:17 +00:00
|
|
|
continue;
|
2021-01-05 18:31:35 +00:00
|
|
|
}
|
2021-01-05 12:18:17 +00:00
|
|
|
|
2021-01-02 13:33:53 +00:00
|
|
|
Stat stat = classStats.get(clazz);
|
2021-01-06 17:23:58 +00:00
|
|
|
boolean consider = (stat.matchedWords >= MIN_MATCHED_WORDS);
|
2021-01-05 07:39:26 +00:00
|
|
|
double chance = (double) stat.totalFrequency / messages / maxMatchedWords;
|
2021-01-02 13:33:53 +00:00
|
|
|
Chance c = new Chance(clazz, chance);
|
2021-01-03 07:41:29 +00:00
|
|
|
EntityLog.log(context, "Classifier " + c +
|
2021-01-05 07:39:26 +00:00
|
|
|
" frequency=" + stat.totalFrequency + "/" + messages +
|
2021-01-06 17:23:58 +00:00
|
|
|
" matched=" + stat.matchedWords + "/" + maxMatchedWords +
|
|
|
|
" consider=" + consider +
|
|
|
|
" words=" + TextUtils.join(", ", stat.words));
|
|
|
|
if (consider)
|
2021-01-06 16:52:08 +00:00
|
|
|
chances.add(c);
|
2021-01-02 13:33:53 +00:00
|
|
|
}
|
|
|
|
|
2021-01-04 14:28:45 +00:00
|
|
|
if (BuildConfig.DEBUG)
|
|
|
|
Log.i("Classifier words=" + TextUtils.join(", ", words));
|
|
|
|
|
2021-01-05 11:55:12 +00:00
|
|
|
if (chances.size() <= 1)
|
2021-01-02 13:33:53 +00:00
|
|
|
return null;
|
|
|
|
|
|
|
|
Collections.sort(chances, new Comparator<Chance>() {
|
|
|
|
@Override
|
|
|
|
public int compare(Chance c1, Chance c2) {
|
|
|
|
return -c1.chance.compareTo(c2.chance);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
String classification = null;
|
2021-01-05 11:48:38 +00:00
|
|
|
if (chances.get(0).chance / chances.get(1).chance >= CHANCE_THRESHOLD)
|
2021-01-02 13:33:53 +00:00
|
|
|
classification = chances.get(0).clazz;
|
|
|
|
|
2021-01-06 10:39:41 +00:00
|
|
|
Log.i("Classifier current=" + currentClass + " classified=" + classification);
|
2021-01-02 13:33:53 +00:00
|
|
|
|
|
|
|
return classification;
|
|
|
|
}
|
|
|
|
|
2021-01-02 19:42:35 +00:00
|
|
|
static synchronized void save(Context context) throws JSONException, IOException {
|
2021-01-02 21:52:41 +00:00
|
|
|
if (!dirty)
|
2021-01-02 21:39:30 +00:00
|
|
|
return;
|
2021-01-02 19:42:35 +00:00
|
|
|
|
|
|
|
File file = getFile(context);
|
2021-01-04 14:35:57 +00:00
|
|
|
Helper.writeText(file, toJson().toString(2));
|
2021-01-02 19:42:35 +00:00
|
|
|
|
2021-01-03 09:06:08 +00:00
|
|
|
dirty = false;
|
2021-01-04 14:35:57 +00:00
|
|
|
Log.i("Classifier data saved");
|
2021-01-02 19:42:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private static synchronized void load(Context context) throws IOException, JSONException {
|
2021-01-05 12:42:43 +00:00
|
|
|
if (loaded || dirty)
|
2021-01-02 19:42:35 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
classMessages.clear();
|
|
|
|
wordClassFrequency.clear();
|
|
|
|
|
|
|
|
File file = getFile(context);
|
|
|
|
if (file.exists()) {
|
|
|
|
String json = Helper.readText(file);
|
2021-01-04 14:35:57 +00:00
|
|
|
fromJson(new JSONObject(json));
|
2021-01-02 19:42:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
loaded = true;
|
2021-01-04 14:35:57 +00:00
|
|
|
Log.i("Classifier data loaded");
|
2021-01-02 19:42:35 +00:00
|
|
|
}
|
|
|
|
|
2021-01-03 19:21:35 +00:00
|
|
|
static synchronized void clear(Context context) {
|
|
|
|
classMessages.clear();
|
|
|
|
wordClassFrequency.clear();
|
|
|
|
dirty = true;
|
2021-01-05 12:42:43 +00:00
|
|
|
Log.i("Classifier data cleared");
|
2021-01-03 19:21:35 +00:00
|
|
|
}
|
|
|
|
|
2021-01-02 21:38:37 +00:00
|
|
|
static boolean isEnabled(Context context) {
|
2021-01-02 19:42:35 +00:00
|
|
|
SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(context);
|
2021-01-03 19:21:35 +00:00
|
|
|
return prefs.getBoolean("classification", false);
|
2021-01-02 19:42:35 +00:00
|
|
|
}
|
|
|
|
|
2021-01-03 07:27:27 +00:00
|
|
|
static boolean canClassify(String folderType) {
|
|
|
|
return EntityFolder.INBOX.equals(folderType) ||
|
|
|
|
EntityFolder.JUNK.equals(folderType) ||
|
|
|
|
EntityFolder.USER.equals(folderType);
|
|
|
|
}
|
2021-01-02 19:42:35 +00:00
|
|
|
|
2021-01-03 09:58:51 +00:00
|
|
|
static File getFile(Context context) {
|
2021-01-02 19:42:35 +00:00
|
|
|
return new File(context.getFilesDir(), "classifier.json");
|
|
|
|
}
|
|
|
|
|
2021-01-04 14:35:57 +00:00
|
|
|
static JSONObject toJson() throws JSONException {
|
|
|
|
JSONArray jmessages = new JSONArray();
|
|
|
|
for (Long account : classMessages.keySet())
|
|
|
|
for (String clazz : classMessages.get(account).keySet()) {
|
|
|
|
JSONObject jmessage = new JSONObject();
|
|
|
|
jmessage.put("account", account);
|
|
|
|
jmessage.put("class", clazz);
|
|
|
|
jmessage.put("count", classMessages.get(account).get(clazz));
|
|
|
|
jmessages.put(jmessage);
|
|
|
|
}
|
|
|
|
|
|
|
|
JSONArray jwords = new JSONArray();
|
|
|
|
for (Long account : classMessages.keySet())
|
|
|
|
for (String word : wordClassFrequency.get(account).keySet()) {
|
|
|
|
Map<String, Integer> classFrequency = wordClassFrequency.get(account).get(word);
|
|
|
|
for (String clazz : classFrequency.keySet()) {
|
|
|
|
JSONObject jword = new JSONObject();
|
|
|
|
jword.put("account", account);
|
|
|
|
jword.put("word", word);
|
|
|
|
jword.put("class", clazz);
|
|
|
|
jword.put("frequency", classFrequency.get(clazz));
|
|
|
|
jwords.put(jword);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
JSONObject jroot = new JSONObject();
|
|
|
|
jroot.put("messages", jmessages);
|
|
|
|
jroot.put("words", jwords);
|
|
|
|
|
|
|
|
return jroot;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void fromJson(JSONObject jroot) throws JSONException {
|
|
|
|
JSONArray jmessages = jroot.getJSONArray("messages");
|
|
|
|
for (int m = 0; m < jmessages.length(); m++) {
|
|
|
|
JSONObject jmessage = (JSONObject) jmessages.get(m);
|
|
|
|
long account = jmessage.getLong("account");
|
|
|
|
if (!classMessages.containsKey(account))
|
|
|
|
classMessages.put(account, new HashMap<>());
|
|
|
|
classMessages.get(account).put(jmessage.getString("class"), jmessage.getInt("count"));
|
|
|
|
}
|
|
|
|
|
|
|
|
JSONArray jwords = jroot.getJSONArray("words");
|
|
|
|
for (int w = 0; w < jwords.length(); w++) {
|
|
|
|
JSONObject jword = (JSONObject) jwords.get(w);
|
|
|
|
long account = jword.getLong("account");
|
|
|
|
if (!wordClassFrequency.containsKey(account))
|
|
|
|
wordClassFrequency.put(account, new HashMap<>());
|
|
|
|
String word = jword.getString("word");
|
|
|
|
Map<String, Integer> classFrequency = wordClassFrequency.get(account).get(word);
|
|
|
|
if (classFrequency == null) {
|
|
|
|
classFrequency = new HashMap<>();
|
|
|
|
wordClassFrequency.get(account).put(word, classFrequency);
|
|
|
|
}
|
|
|
|
classFrequency.put(jword.getString("class"), jword.getInt("frequency"));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-02 13:33:53 +00:00
|
|
|
private static class Stat {
|
|
|
|
int matchedWords = 0;
|
|
|
|
int totalFrequency = 0;
|
2021-01-06 17:23:58 +00:00
|
|
|
List<String> words = new ArrayList<>();
|
2021-01-02 13:33:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private static class Chance {
|
|
|
|
String clazz;
|
|
|
|
Double chance;
|
|
|
|
|
|
|
|
Chance(String clazz, Double chance) {
|
|
|
|
this.clazz = clazz;
|
|
|
|
this.chance = chance;
|
|
|
|
}
|
|
|
|
|
|
|
|
@NotNull
|
|
|
|
@Override
|
|
|
|
public String toString() {
|
|
|
|
return clazz + "=" + chance;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|