Spam filter experiment

This commit is contained in:
M66B 2020-09-27 17:01:59 +02:00
parent e5bfbce6b0
commit 5922d30104
4 changed files with 89 additions and 0 deletions

View File

@ -389,4 +389,7 @@ dependencies {
// http://www.freeutils.net/source/jcharset/
// https://mvnrepository.com/artifact/net.freeutils/jcharset
implementation "net.freeutils:jcharset:$jcharset_version"
// https://github.com/ptnplanet/Java-Naive-Bayes-Classifier
implementation "com.github.ptnplanet:Java-Naive-Bayes-Classifier:1.0.7"
}

View File

@ -1419,6 +1419,7 @@ class Core {
parts.isPlainOnly(),
HtmlHelper.getPreview(body),
parts.getWarnings(message.warning));
JunkFilter.classify(context, body, folder.type);
if (body != null)
EntityLog.log(context, "Operation body size=" + body.length());
@ -2073,6 +2074,7 @@ class Core {
parts.isPlainOnly(),
HtmlHelper.getPreview(body),
parts.getWarnings(message.warning));
JunkFilter.classify(context, body, folder.type);
for (EntityAttachment attachment : parts.getAttachments())
parts.downloadAttachment(context, attachment);
@ -2867,6 +2869,8 @@ class Core {
parts.isPlainOnly(),
HtmlHelper.getPreview(body),
parts.getWarnings(message.warning));
JunkFilter.classify(context, body, folder.type);
if (stats != null && body != null)
stats.content += body.length();
Log.i(folder.name + " inline downloaded message id=" + message.id +
@ -3261,6 +3265,8 @@ class Core {
parts.isPlainOnly(),
HtmlHelper.getPreview(body),
parts.getWarnings(message.warning));
JunkFilter.classify(context, body, folder.type);
if (stats != null && body != null)
stats.content += body.length();
Log.i(folder.name + " downloaded message id=" + message.id +

View File

@ -0,0 +1,76 @@
package eu.faircode.email;
import android.content.Context;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ExecutorService;
import de.daslaboratorium.machinelearning.classifier.Classification;
import de.daslaboratorium.machinelearning.classifier.Classifier;
import de.daslaboratorium.machinelearning.classifier.bayes.BayesClassifier;
public class JunkFilter {
private static final Classifier<String, String> bayes = new BayesClassifier<>();
private static final ExecutorService executor =
Helper.getBackgroundExecutor(1, "junk");
static void classify(Context context, String html, String folderType) {
if (EntityFolder.isOutgoing(folderType) ||
EntityFolder.ARCHIVE.equals(folderType))
return;
final boolean junk = EntityFolder.JUNK.equals(folderType);
executor.submit(new Runnable() {
@Override
public void run() {
try {
String text = HtmlHelper.getText(context, html);
List<String> words = Arrays.asList(text.split("[^\\p{L}\\p{N}'`]+"));
Classification<String, String> classification = bayes.classify(words);
Log.i("MMM folder=" + folderType + " category=" + (classification == null ? null : classification.getCategory()));
bayes.learn(junk ? "junk" : "ham", words);
} catch (Throwable ex) {
Log.e(ex);
}
}
});
}
static void save(Context context) {
final File file = new File(context.getFilesDir(), "junk.filter");
executor.submit(new Runnable() {
@Override
public void run() {
try (FileOutputStream fos = new FileOutputStream(file)) {
} catch (Throwable ex) {
Log.e(ex);
}
}
});
}
static void load(Context context) {
final File file = new File(context.getFilesDir(), "junk.filter");
executor.submit(new Runnable() {
@Override
public void run() {
if (!file.exists())
return;
try (FileInputStream fis = new FileInputStream(file)) {
} catch (Throwable ex) {
Log.e(ex);
}
}
});
}
}

View File

@ -617,6 +617,8 @@ public class ServiceSynchronize extends ServiceBase implements SharedPreferences
});
prefs.registerOnSharedPreferenceChangeListener(this);
JunkFilter.load(this);
}
@Override
@ -686,6 +688,8 @@ public class ServiceSynchronize extends ServiceBase implements SharedPreferences
NotificationManager nm = (NotificationManager) getSystemService(Context.NOTIFICATION_SERVICE);
nm.cancel(Helper.NOTIFICATION_SYNCHRONIZE);
JunkFilter.save(this);
super.onDestroy();
}