mirror of
https://github.com/M66B/FairEmail.git
synced 2024-12-27 02:07:12 +00:00
Reduce classifier data size
This commit is contained in:
parent
1e4121d608
commit
c26482ff76
1 changed files with 38 additions and 9 deletions
|
@ -504,7 +504,7 @@ public class MessageClassifier {
|
||||||
if (backup.exists())
|
if (backup.exists())
|
||||||
file = backup;
|
file = backup;
|
||||||
try {
|
try {
|
||||||
_load(context, file);
|
_load(file);
|
||||||
} catch (Throwable ex) {
|
} catch (Throwable ex) {
|
||||||
Log.e(ex);
|
Log.e(ex);
|
||||||
file.delete();
|
file.delete();
|
||||||
|
@ -512,7 +512,7 @@ public class MessageClassifier {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static synchronized void _load(Context context, File file) throws IOException {
|
private static synchronized void _load(File file) throws IOException {
|
||||||
Log.i("Classifier read " + file);
|
Log.i("Classifier read " + file);
|
||||||
long start = new Date().getTime();
|
long start = new Date().getTime();
|
||||||
if (file.exists())
|
if (file.exists())
|
||||||
|
@ -650,13 +650,42 @@ public class MessageClassifier {
|
||||||
dirty = false;
|
dirty = false;
|
||||||
|
|
||||||
long elapsed = new Date().getTime() - start;
|
long elapsed = new Date().getTime() - start;
|
||||||
EntityLog.log(context, "Classifier data loaded elapsed=" + elapsed);
|
Log.i("Classifier data loaded elapsed=" + elapsed);
|
||||||
for (long account : classMessages.keySet())
|
|
||||||
EntityLog.log(context, "Messages account=" + account + " classes=" + classMessages.get(account).size());
|
for (long account : wordClassFrequency.keySet()) {
|
||||||
for (long account : wordClassFrequency.keySet())
|
Map<String, Long> total = new HashMap<>();
|
||||||
EntityLog.log(context, "Words account=" + account + " words=" + wordClassFrequency.get(account).size());
|
Map<String, Integer> count = new HashMap<>();
|
||||||
for (long account : accountMsgIds.keySet())
|
|
||||||
EntityLog.log(context, "Classified account=" + account + " ids=" + accountMsgIds.get(account).size());
|
for (String word : wordClassFrequency.get(account).keySet())
|
||||||
|
for (String clazz : wordClassFrequency.get(account).get(word).keySet()) {
|
||||||
|
int f = wordClassFrequency.get(account).get(word).get(clazz).count;
|
||||||
|
|
||||||
|
if (!total.containsKey(clazz))
|
||||||
|
total.put(clazz, 0L);
|
||||||
|
total.put(clazz, total.get(clazz) + f);
|
||||||
|
|
||||||
|
if (!count.containsKey(clazz))
|
||||||
|
count.put(clazz, 0);
|
||||||
|
count.put(clazz, count.get(clazz) + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String word : wordClassFrequency.get(account).keySet())
|
||||||
|
for (String clazz : new ArrayList<>(wordClassFrequency.get(account).get(word).keySet())) {
|
||||||
|
int freq = wordClassFrequency.get(account).get(word).get(clazz).count;
|
||||||
|
long avg = total.get(clazz) / count.get(clazz);
|
||||||
|
if (freq < avg / 2) {
|
||||||
|
Log.i("Classifier dropping account=" + account +
|
||||||
|
" word=" + word + " class=" + clazz + " freq=" + freq + " avg=" + avg);
|
||||||
|
wordClassFrequency.get(account).get(word).remove(clazz);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source 47 MB
|
||||||
|
// avg/1 = 21.3
|
||||||
|
// avg/2 = 25.5
|
||||||
|
// avg/3 = 29.0
|
||||||
|
// avg/5 = 34.6
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static synchronized void cleanup(@NonNull Context context) {
|
static synchronized void cleanup(@NonNull Context context) {
|
||||||
|
|
Loading…
Reference in a new issue