FairEmail/app/src/main/java/eu/faircode/email/MessageClassifier.java

634 lines
23 KiB
Java
Raw Normal View History

2021-01-02 13:33:53 +00:00
package eu.faircode.email;
/*
This file is part of FairEmail.
FairEmail is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
FairEmail is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with FairEmail. If not, see <http://www.gnu.org/licenses/>.
Copyright 2018-2021 by Marcel Bokhorst (M66B)
*/
import android.content.Context;
2021-01-02 14:57:24 +00:00
import android.content.SharedPreferences;
import android.os.Build;
import android.text.TextUtils;
2021-01-02 13:33:53 +00:00
2021-01-10 13:31:29 +00:00
import androidx.annotation.NonNull;
2021-01-02 14:57:24 +00:00
import androidx.preference.PreferenceManager;
2021-01-02 13:33:53 +00:00
import org.jetbrains.annotations.NotNull;
2021-01-02 19:42:35 +00:00
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
2021-01-02 13:33:53 +00:00
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
2021-01-02 13:33:53 +00:00
import java.util.Collections;
import java.util.Comparator;
2021-01-03 07:41:29 +00:00
import java.util.Date;
2021-01-02 13:33:53 +00:00
import java.util.HashMap;
import java.util.Iterator;
2021-01-02 13:33:53 +00:00
import java.util.List;
import java.util.Map;
import javax.mail.Address;
import javax.mail.internet.InternetAddress;
2021-01-02 13:33:53 +00:00
public class MessageClassifier {
2021-01-02 19:42:35 +00:00
private static boolean loaded = false;
2021-01-02 21:52:41 +00:00
private static boolean dirty = false;
2021-01-10 16:02:27 +00:00
private static final Map<Long, List<String>> accountMsgIds = new HashMap<>();
2021-01-07 18:20:17 +00:00
private static final Map<Long, Map<String, Integer>> classMessages = new HashMap<>();
2021-01-06 20:35:11 +00:00
private static final Map<Long, Map<String, Map<String, Frequency>>> wordClassFrequency = new HashMap<>();
2021-01-02 13:33:53 +00:00
2021-01-05 18:51:22 +00:00
static void classify(EntityMessage message, EntityFolder folder, EntityFolder target, Context context) {
2021-01-02 19:42:35 +00:00
try {
2021-01-03 11:23:51 +00:00
if (!isEnabled(context))
return;
2021-01-03 07:27:27 +00:00
2021-01-05 18:51:22 +00:00
if (!canClassify(folder.type))
2021-01-03 11:23:51 +00:00
return;
2021-01-02 13:33:53 +00:00
2021-01-05 18:51:22 +00:00
if (target != null && !canClassify(target.type))
2021-01-03 11:23:51 +00:00
return;
2021-01-02 19:50:43 +00:00
2021-01-06 07:31:34 +00:00
long start = new Date().getTime();
2021-01-05 18:51:22 +00:00
// Build text to classify
2021-01-10 13:31:29 +00:00
List<String> texts = getTexts(message, context);
if (texts.size() == 0)
2021-01-03 11:23:51 +00:00
return;
2021-01-02 13:33:53 +00:00
2021-01-05 18:51:22 +00:00
// Load data if needed
2021-01-03 11:23:51 +00:00
load(context);
2021-01-02 21:52:41 +00:00
2021-01-10 16:02:27 +00:00
// Initialize account if needed
if (!accountMsgIds.containsKey(folder.account))
accountMsgIds.put(folder.account, new ArrayList<>());
if (!classMessages.containsKey(folder.account))
classMessages.put(folder.account, new HashMap<>());
if (!wordClassFrequency.containsKey(folder.account))
wordClassFrequency.put(folder.account, new HashMap<>());
2021-01-10 13:31:29 +00:00
// Classify texts
String classified = classify(folder.account, folder.name, texts, target == null, context);
2021-01-03 11:23:51 +00:00
2021-01-06 07:31:34 +00:00
long elapsed = new Date().getTime() - start;
2021-01-03 11:23:51 +00:00
EntityLog.log(context, "Classifier" +
" folder=" + folder.name +
" message=" + message.id +
"@" + new Date(message.received) +
":" + message.subject +
2021-01-04 11:21:51 +00:00
" class=" + classified +
2021-01-06 07:31:34 +00:00
" re=" + message.auto_classified +
" elapsed=" + elapsed);
2021-01-03 11:23:51 +00:00
2021-01-10 13:31:29 +00:00
// Auto classify message
2021-01-05 18:51:22 +00:00
if (classified != null &&
!classified.equals(folder.name) &&
2021-01-10 16:02:27 +00:00
!TextUtils.isEmpty(message.msgid) &&
!accountMsgIds.get(folder.account).contains(message.msgid) &&
2021-01-05 18:51:22 +00:00
!EntityFolder.JUNK.equals(folder.type)) {
2021-01-10 17:43:23 +00:00
boolean pro = ActivityBilling.isPro(context);
2021-01-05 18:51:22 +00:00
DB db = DB.getInstance(context);
2021-01-04 11:21:51 +00:00
try {
db.beginTransaction();
2021-01-05 18:51:22 +00:00
EntityFolder dest = db.folder().getFolderByName(folder.account, classified);
2021-01-10 17:43:23 +00:00
if (dest != null && dest.auto_classify &&
(pro || EntityFolder.JUNK.equals(dest.type))) {
2021-01-05 18:51:22 +00:00
EntityOperation.queue(context, message, EntityOperation.MOVE, dest.id, false, true);
2021-01-04 08:27:27 +00:00
message.ui_hide = true;
}
2021-01-04 11:21:51 +00:00
db.setTransactionSuccessful();
} finally {
db.endTransaction();
}
2021-01-10 17:47:29 +00:00
if (message.ui_hide)
accountMsgIds.get(folder.account).add(message.msgid);
2021-01-05 18:51:22 +00:00
}
2021-01-10 16:02:27 +00:00
dirty = true;
2021-01-03 11:23:51 +00:00
} catch (Throwable ex) {
Log.e(ex);
2021-01-02 21:38:37 +00:00
}
2021-01-02 13:33:53 +00:00
}
2021-01-10 13:31:29 +00:00
@NonNull
private static List<String> getTexts(@NonNull EntityMessage message, @NonNull Context context) throws IOException {
List<String> texts = new ArrayList<>();
File file = message.getFile(context);
if (!file.exists())
return texts;
List<Address> addresses = new ArrayList<>();
if (message.from != null)
addresses.addAll(Arrays.asList(message.from));
if (message.to != null)
addresses.addAll(Arrays.asList(message.to));
if (message.cc != null)
addresses.addAll(Arrays.asList(message.cc));
if (message.bcc != null)
addresses.addAll(Arrays.asList(message.bcc));
if (message.reply != null)
addresses.addAll(Arrays.asList(message.reply));
for (Address address : addresses) {
String email = ((InternetAddress) address).getAddress();
String name = ((InternetAddress) address).getPersonal();
if (!TextUtils.isEmpty(email))
texts.add(email);
if (!TextUtils.isEmpty(name))
texts.add(name);
2021-01-07 09:58:56 +00:00
}
2021-01-10 13:31:29 +00:00
if (message.subject != null)
texts.add(message.subject);
String text = HtmlHelper.getFullText(file);
texts.add(text);
return texts;
}
private static String classify(long account, @NonNull String currentClass, @NonNull List<String> texts, boolean added, @NonNull Context context) {
2021-01-06 20:35:11 +00:00
State state = new State();
2021-01-07 14:20:39 +00:00
2021-01-10 13:31:29 +00:00
Log.i("Classifier texts=" + texts.size());
for (String text : texts) {
// First word
processWord(account, added, null, state);
// Process words
if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) {
java.text.BreakIterator boundary = java.text.BreakIterator.getWordInstance();
boundary.setText(text);
int start = boundary.first();
for (int end = boundary.next(); end != java.text.BreakIterator.DONE; end = boundary.next()) {
String word = text.substring(start, end);
processWord(account, added, word, state);
start = end;
}
} else {
// The ICU break iterator works better for Chinese texts
android.icu.text.BreakIterator boundary = android.icu.text.BreakIterator.getWordInstance();
boundary.setText(text);
int start = boundary.first();
for (int end = boundary.next(); end != android.icu.text.BreakIterator.DONE; end = boundary.next()) {
String word = text.substring(start, end);
processWord(account, added, word, state);
start = end;
}
2021-01-02 13:33:53 +00:00
}
}
2021-01-10 13:31:29 +00:00
// final word
processWord(account, added, null, state);
int maxMessages = 0;
for (String clazz : classMessages.get(account).keySet()) {
int count = classMessages.get(account).get(clazz);
if (count > maxMessages)
maxMessages = count;
}
2021-01-10 14:26:51 +00:00
updateFrequencies(account, currentClass, added, state);
if (maxMessages == 0) {
2021-01-07 21:08:21 +00:00
Log.i("Classifier no messages account=" + account);
2021-01-07 14:20:39 +00:00
return null;
}
2021-01-10 14:26:51 +00:00
if (!added)
return null;
2021-01-07 14:20:39 +00:00
// Calculate chance per class
DB db = DB.getInstance(context);
2021-01-10 13:31:29 +00:00
int words = state.words.size() - texts.size() - 1;
List<Chance> chances = new ArrayList<>();
for (String clazz : state.classStats.keySet()) {
2021-01-05 12:18:17 +00:00
EntityFolder folder = db.folder().getFolderByName(account, clazz);
2021-01-05 18:31:35 +00:00
if (folder == null) {
Log.w("Classifier no folder class=" + account + ":" + clazz);
2021-01-05 12:18:17 +00:00
continue;
2021-01-05 18:31:35 +00:00
}
2021-01-05 12:18:17 +00:00
2021-01-06 20:35:11 +00:00
Stat stat = state.classStats.get(clazz);
2021-01-07 09:58:56 +00:00
2021-01-08 17:30:00 +00:00
double chance = stat.totalFrequency / maxMessages / words;
2021-01-02 13:33:53 +00:00
Chance c = new Chance(clazz, chance);
2021-01-07 08:35:58 +00:00
chances.add(c);
2021-01-07 09:58:56 +00:00
EntityLog.log(context, "Classifier " + c +
2021-01-07 13:55:14 +00:00
" frequency=" + (Math.round(stat.totalFrequency * 100.0) / 100.0) + "/" + maxMessages + " msgs" +
2021-01-08 17:30:00 +00:00
" matched=" + stat.matchedWords + "/" + words + " words" +
2021-01-07 09:58:56 +00:00
" text=" + TextUtils.join(", ", stat.words));
2021-01-02 13:33:53 +00:00
}
if (BuildConfig.DEBUG)
2021-01-10 13:31:29 +00:00
Log.i("Classifier words=" + state.words.size() + " " + TextUtils.join(", ", state.words));
2021-01-05 11:55:12 +00:00
if (chances.size() <= 1)
2021-01-02 13:33:53 +00:00
return null;
2021-01-07 14:20:39 +00:00
// Sort classes by chance
2021-01-02 13:33:53 +00:00
Collections.sort(chances, new Comparator<Chance>() {
@Override
public int compare(Chance c1, Chance c2) {
return -c1.chance.compareTo(c2.chance);
}
});
2021-01-08 13:21:51 +00:00
SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(context);
2021-01-10 18:23:23 +00:00
double class_min_chance = prefs.getInt("class_min_probability", 20) / 100.0;
2021-01-08 13:21:51 +00:00
double class_min_difference = prefs.getInt("class_min_difference", 50) / 100.0;
2021-01-07 14:20:39 +00:00
// Select best class
2021-01-02 13:33:53 +00:00
String classification = null;
2021-01-08 13:21:51 +00:00
double c0 = chances.get(0).chance;
double c1 = chances.get(1).chance;
2021-01-08 13:32:14 +00:00
double threshold = c0 * (1.0 - class_min_difference);
if (c0 > class_min_chance && c1 < threshold)
2021-01-02 13:33:53 +00:00
classification = chances.get(0).clazz;
2021-01-08 13:21:51 +00:00
Log.i("Classifier current=" + currentClass +
2021-01-08 13:32:14 +00:00
" c0=" + Math.round(c0 * 100 * 100) / 100.0 + ">" + Math.round(class_min_chance * 100) + "%" +
" c1=" + Math.round(c1 * 100 * 100) / 100.0 + "<" + Math.round(threshold * 100 * 100) / 100.0 + "%" +
" (" + Math.round(class_min_difference * 100) + "%)" +
2021-01-08 13:21:51 +00:00
" classified=" + classification);
2021-01-02 13:33:53 +00:00
return classification;
}
2021-01-10 13:31:29 +00:00
private static void processWord(long account, boolean added, String word, State state) {
2021-01-07 13:17:38 +00:00
if (word != null) {
word = word.trim().toLowerCase();
2021-01-10 13:31:29 +00:00
if (word.length() < 2 || word.matches(".*\\d.*"))
2021-01-07 13:17:38 +00:00
return;
}
2021-01-10 13:31:29 +00:00
if (word != null ||
state.words.size() == 0 ||
state.words.get(state.words.size() - 1) != null)
state.words.add(word);
if (!added)
return;
2021-01-07 13:17:38 +00:00
2021-01-06 20:35:11 +00:00
if (state.words.size() < 3)
return;
String before = state.words.get(state.words.size() - 3);
String current = state.words.get(state.words.size() - 2);
String after = state.words.get(state.words.size() - 1);
2021-01-10 13:31:29 +00:00
if (current == null)
return;
2021-01-06 20:35:11 +00:00
Map<String, Frequency> classFrequency = wordClassFrequency.get(account).get(current);
2021-01-10 13:31:29 +00:00
if (classFrequency == null)
return;
for (String clazz : classFrequency.keySet()) {
Frequency frequency = classFrequency.get(clazz);
if (frequency.count <= 0)
continue;
Stat stat = state.classStats.get(clazz);
if (stat == null) {
stat = new Stat();
state.classStats.put(clazz, stat);
2021-01-06 20:35:11 +00:00
}
2021-01-10 13:31:29 +00:00
int c = (frequency.count - frequency.duplicates);
Integer b = (before == null ? null : frequency.before.get(before));
Integer a = (after == null ? null : frequency.after.get(after));
double f = (c +
(b == null ? 2 * c : 2.0 * b / frequency.count * c) +
(a == null ? 2 * c : 2.0 * a / frequency.count * c)) / 5.0;
//Log.i("Classifier " +
// before + "/" + b + "/" + frequency.before.get(before) + " " +
// after + "/" + a + "/" + frequency.after.get(after) + " " +
// current + "/" + c + "=" + frequency.count + "-" + frequency.duplicates +
// " f=" + f);
stat.totalFrequency += f;
stat.matchedWords++;
if (BuildConfig.DEBUG)
stat.words.add(current + "=" + f);
}
}
2021-01-06 20:35:11 +00:00
2021-01-10 14:26:51 +00:00
private static void updateFrequencies(long account, @NonNull String currentClass, boolean added, @NonNull State state) {
Integer m = classMessages.get(account).get(currentClass);
m = (m == null ? 0 : m) + (added ? 1 : -1);
if (m <= 0)
classMessages.get(account).remove(currentClass);
else
classMessages.get(account).put(currentClass, m);
Log.i("Classifier " + currentClass + "=" + m + " msgs");
2021-01-10 13:31:29 +00:00
for (int i = 1; i < state.words.size() - 1; i++) {
String before = state.words.get(i - 1);
String current = state.words.get(i);
String after = state.words.get(i + 1);
2021-01-06 20:35:11 +00:00
2021-01-10 13:31:29 +00:00
if (current == null)
continue;
2021-01-06 20:35:11 +00:00
2021-01-10 13:31:29 +00:00
Map<String, Frequency> classFrequency = wordClassFrequency.get(account).get(current);
if (added) {
if (classFrequency == null) {
classFrequency = new HashMap<>();
wordClassFrequency.get(account).put(current, classFrequency);
}
2021-01-10 14:26:51 +00:00
Frequency c = classFrequency.get(currentClass);
2021-01-10 13:31:29 +00:00
if (c == null) {
c = new Frequency();
2021-01-10 14:26:51 +00:00
classFrequency.put(currentClass, c);
2021-01-10 13:31:29 +00:00
}
c.add(before, after, 1, state.words.indexOf(current) < i);
} else {
2021-01-10 14:26:51 +00:00
Frequency c = (classFrequency == null ? null : classFrequency.get(currentClass));
2021-01-10 13:31:29 +00:00
if (c != null)
c.add(before, after, -1, state.words.indexOf(current) < i);
2021-01-06 20:35:11 +00:00
}
}
}
2021-01-10 13:31:29 +00:00
static synchronized void save(@NonNull Context context) throws JSONException, IOException {
2021-01-02 21:52:41 +00:00
if (!dirty)
2021-01-02 21:39:30 +00:00
return;
2021-01-02 19:42:35 +00:00
File file = getFile(context);
2021-01-04 14:35:57 +00:00
Helper.writeText(file, toJson().toString(2));
2021-01-02 19:42:35 +00:00
2021-01-03 09:06:08 +00:00
dirty = false;
2021-01-04 14:35:57 +00:00
Log.i("Classifier data saved");
2021-01-02 19:42:35 +00:00
}
2021-01-10 13:31:29 +00:00
private static synchronized void load(@NonNull Context context) throws IOException, JSONException {
2021-01-05 12:42:43 +00:00
if (loaded || dirty)
2021-01-02 19:42:35 +00:00
return;
wordClassFrequency.clear();
File file = getFile(context);
if (file.exists()) {
String json = Helper.readText(file);
2021-01-04 14:35:57 +00:00
fromJson(new JSONObject(json));
2021-01-02 19:42:35 +00:00
}
loaded = true;
2021-01-04 14:35:57 +00:00
Log.i("Classifier data loaded");
2021-01-02 19:42:35 +00:00
}
2021-01-10 16:32:14 +00:00
static synchronized void cleanup(@NonNull Context context) {
try {
load(context);
DB db = DB.getInstance(context);
for (Long account : accountMsgIds.keySet()) {
List<String> msgids = accountMsgIds.get(account);
Log.i("Classifier cleanup account=" + account + " count=" + msgids.size());
for (String msgid : new ArrayList<>(msgids)) {
List<EntityMessage> messages = db.message().getMessagesByMsgId(account, msgid);
if (messages != null && messages.size() == 0) {
Log.i("Classifier removing msgid=" + msgid);
msgids.remove(msgid);
dirty = true;
}
}
}
if (dirty)
save(context);
} catch (Throwable ex) {
Log.e(ex);
}
}
2021-01-10 13:31:29 +00:00
static synchronized void clear(@NonNull Context context) {
2021-01-10 17:18:14 +00:00
accountMsgIds.clear();
wordClassFrequency.clear();
dirty = true;
2021-01-05 12:42:43 +00:00
Log.i("Classifier data cleared");
}
2021-01-10 13:31:29 +00:00
static boolean isEnabled(@NonNull Context context) {
2021-01-02 19:42:35 +00:00
SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(context);
return prefs.getBoolean("classification", false);
2021-01-02 19:42:35 +00:00
}
2021-01-10 13:31:29 +00:00
static boolean canClassify(@NonNull String folderType) {
2021-01-03 07:27:27 +00:00
return EntityFolder.INBOX.equals(folderType) ||
EntityFolder.JUNK.equals(folderType) ||
EntityFolder.USER.equals(folderType);
}
2021-01-02 19:42:35 +00:00
2021-01-10 13:31:29 +00:00
static File getFile(@NonNull Context context) {
2021-01-02 19:42:35 +00:00
return new File(context.getFilesDir(), "classifier.json");
}
2021-01-10 13:31:29 +00:00
@NonNull
2021-01-04 14:35:57 +00:00
static JSONObject toJson() throws JSONException {
2021-01-07 18:20:17 +00:00
JSONArray jmessages = new JSONArray();
for (Long account : classMessages.keySet())
for (String clazz : classMessages.get(account).keySet()) {
JSONObject jmessage = new JSONObject();
jmessage.put("account", account);
jmessage.put("class", clazz);
jmessage.put("count", classMessages.get(account).get(clazz));
jmessages.put(jmessage);
}
2021-01-04 14:35:57 +00:00
JSONArray jwords = new JSONArray();
2021-01-07 09:58:56 +00:00
for (Long account : wordClassFrequency.keySet())
2021-01-04 14:35:57 +00:00
for (String word : wordClassFrequency.get(account).keySet()) {
2021-01-06 20:35:11 +00:00
Map<String, Frequency> classFrequency = wordClassFrequency.get(account).get(word);
2021-01-04 14:35:57 +00:00
for (String clazz : classFrequency.keySet()) {
2021-01-06 20:35:11 +00:00
Frequency f = classFrequency.get(clazz);
2021-01-04 14:35:57 +00:00
JSONObject jword = new JSONObject();
jword.put("account", account);
jword.put("word", word);
jword.put("class", clazz);
2021-01-10 13:31:29 +00:00
jword.put("count", f.count);
jword.put("dup", f.duplicates);
2021-01-06 20:35:11 +00:00
jword.put("before", from(f.before));
jword.put("after", from(f.after));
2021-01-04 14:35:57 +00:00
jwords.put(jword);
}
}
2021-01-10 16:02:27 +00:00
JSONArray jclassified = new JSONArray();
for (Long account : accountMsgIds.keySet()) {
JSONObject jaccount = new JSONObject();
jaccount.put("account", account);
jaccount.put("messages", from(accountMsgIds.get(account)));
jclassified.put(jaccount);
}
2021-01-04 14:35:57 +00:00
JSONObject jroot = new JSONObject();
2021-01-10 13:31:29 +00:00
jroot.put("version", 2);
2021-01-07 18:20:17 +00:00
jroot.put("messages", jmessages);
2021-01-04 14:35:57 +00:00
jroot.put("words", jwords);
2021-01-10 16:02:27 +00:00
jroot.put("classified", jclassified);
2021-01-04 14:35:57 +00:00
return jroot;
}
2021-01-10 16:02:27 +00:00
@NonNull
private static JSONArray from(@NonNull List<String> list) throws JSONException {
JSONArray jlist = new JSONArray();
for (String item : list)
jlist.put(item);
return jlist;
}
2021-01-10 13:31:29 +00:00
@NonNull
private static JSONObject from(@NonNull Map<String, Integer> map) throws JSONException {
JSONObject jmap = new JSONObject();
for (String key : map.keySet())
jmap.put(key, map.get(key));
return jmap;
2021-01-06 20:35:11 +00:00
}
2021-01-10 13:31:29 +00:00
static void fromJson(@NonNull JSONObject jroot) throws JSONException {
2021-01-07 18:20:17 +00:00
int version = jroot.optInt("version");
2021-01-10 13:31:29 +00:00
if (version < 2)
2021-01-07 18:20:17 +00:00
return;
JSONArray jmessages = jroot.getJSONArray("messages");
for (int m = 0; m < jmessages.length(); m++) {
JSONObject jmessage = (JSONObject) jmessages.get(m);
long account = jmessage.getLong("account");
if (!classMessages.containsKey(account))
classMessages.put(account, new HashMap<>());
String clazz = jmessage.getString("class");
int count = jmessage.getInt("count");
classMessages.get(account).put(clazz, count);
}
2021-01-04 14:35:57 +00:00
JSONArray jwords = jroot.getJSONArray("words");
for (int w = 0; w < jwords.length(); w++) {
JSONObject jword = (JSONObject) jwords.get(w);
long account = jword.getLong("account");
if (!wordClassFrequency.containsKey(account))
wordClassFrequency.put(account, new HashMap<>());
2021-01-10 13:31:29 +00:00
if (jword.has("word")) {
String word = jword.getString("word");
Map<String, Frequency> classFrequency = wordClassFrequency.get(account).get(word);
if (classFrequency == null) {
classFrequency = new HashMap<>();
wordClassFrequency.get(account).put(word, classFrequency);
}
Frequency f = new Frequency();
f.count = jword.getInt("count");
f.duplicates = jword.optInt("dup");
if (jword.has("before"))
f.before = from(jword.getJSONObject("before"));
if (jword.has("after"))
f.after = from(jword.getJSONObject("after"));
classFrequency.put(jword.getString("class"), f);
} else
Log.w("No words account=" + account);
2021-01-04 14:35:57 +00:00
}
2021-01-10 16:02:27 +00:00
JSONArray jclassified = jroot.getJSONArray("classified");
for (int a = 0; a < jclassified.length(); a++) {
JSONObject jaccount = jclassified.getJSONObject(a);
long account = jaccount.getLong("account");
List<String> ids = accountMsgIds.get(account);
if (ids == null) {
ids = new ArrayList<>();
accountMsgIds.put(account, ids);
}
JSONArray jids = jaccount.getJSONArray("messages");
for (int h = 0; h < jids.length(); h++)
ids.add(jids.getString(h));
}
2021-01-04 14:35:57 +00:00
}
2021-01-10 13:31:29 +00:00
@NonNull
private static Map<String, Integer> from(@NonNull JSONObject jmap) throws JSONException {
Map<String, Integer> result = new HashMap<>(jmap.length());
Iterator<String> iterator = jmap.keys();
while (iterator.hasNext()) {
String key = iterator.next();
result.put(key, jmap.getInt(key));
}
2021-01-06 20:35:11 +00:00
return result;
}
private static class State {
2021-01-10 13:31:29 +00:00
private final List<String> words = new ArrayList<>();
private final Map<String, Stat> classStats = new HashMap<>();
2021-01-06 20:35:11 +00:00
}
private static class Frequency {
private int count = 0;
2021-01-10 13:31:29 +00:00
private int duplicates = 0;
private Map<String, Integer> before = new HashMap<>();
private Map<String, Integer> after = new HashMap<>();
2021-01-10 13:31:29 +00:00
private void add(String b, String a, int c, boolean duplicate) {
if (count + c < 0)
return;
count += c;
2021-01-10 13:31:29 +00:00
if (duplicate)
duplicates += c;
if (b != null) {
Integer x = before.get(b);
before.put(b, (x == null ? 0 : x) + c);
}
if (a != null) {
Integer x = after.get(a);
after.put(a, (x == null ? 0 : x) + c);
}
}
2021-01-06 20:35:11 +00:00
}
2021-01-02 13:33:53 +00:00
private static class Stat {
2021-01-07 19:11:05 +00:00
private int matchedWords = 0;
private double totalFrequency = 0;
2021-01-10 13:31:29 +00:00
private final List<String> words = new ArrayList<>();
2021-01-02 13:33:53 +00:00
}
private static class Chance {
private String clazz;
private Double chance;
2021-01-02 13:33:53 +00:00
private Chance(String clazz, Double chance) {
2021-01-02 13:33:53 +00:00
this.clazz = clazz;
this.chance = chance;
}
@NotNull
@Override
public String toString() {
2021-01-07 13:55:14 +00:00
return clazz + "=" + Math.round(chance * 100.0 * 100.0) / 100.0 + "%";
2021-01-02 13:33:53 +00:00
}
}
}