From 5922d3010429a5fad7462e7744d3d3555964c526 Mon Sep 17 00:00:00 2001 From: M66B Date: Sun, 27 Sep 2020 17:01:59 +0200 Subject: [PATCH] Spam filter experiment --- app/build.gradle | 3 + app/src/main/java/eu/faircode/email/Core.java | 6 ++ .../java/eu/faircode/email/JunkFilter.java | 76 +++++++++++++++++++ .../eu/faircode/email/ServiceSynchronize.java | 4 + 4 files changed, 89 insertions(+) create mode 100644 app/src/main/java/eu/faircode/email/JunkFilter.java diff --git a/app/build.gradle b/app/build.gradle index b726c25cfd..e9584bb8ce 100644 --- a/app/build.gradle +++ b/app/build.gradle @@ -389,4 +389,7 @@ dependencies { // http://www.freeutils.net/source/jcharset/ // https://mvnrepository.com/artifact/net.freeutils/jcharset implementation "net.freeutils:jcharset:$jcharset_version" + + // https://github.com/ptnplanet/Java-Naive-Bayes-Classifier + implementation "com.github.ptnplanet:Java-Naive-Bayes-Classifier:1.0.7" } diff --git a/app/src/main/java/eu/faircode/email/Core.java b/app/src/main/java/eu/faircode/email/Core.java index 98b2c67790..1f43c22d3c 100644 --- a/app/src/main/java/eu/faircode/email/Core.java +++ b/app/src/main/java/eu/faircode/email/Core.java @@ -1419,6 +1419,7 @@ class Core { parts.isPlainOnly(), HtmlHelper.getPreview(body), parts.getWarnings(message.warning)); + JunkFilter.classify(context, body, folder.type); if (body != null) EntityLog.log(context, "Operation body size=" + body.length()); @@ -2073,6 +2074,7 @@ class Core { parts.isPlainOnly(), HtmlHelper.getPreview(body), parts.getWarnings(message.warning)); + JunkFilter.classify(context, body, folder.type); for (EntityAttachment attachment : parts.getAttachments()) parts.downloadAttachment(context, attachment); @@ -2867,6 +2869,8 @@ class Core { parts.isPlainOnly(), HtmlHelper.getPreview(body), parts.getWarnings(message.warning)); + JunkFilter.classify(context, body, folder.type); + if (stats != null && body != null) stats.content += body.length(); Log.i(folder.name + " inline downloaded message id=" + message.id + @@ -3261,6 +3265,8 @@ class Core { parts.isPlainOnly(), HtmlHelper.getPreview(body), parts.getWarnings(message.warning)); + JunkFilter.classify(context, body, folder.type); + if (stats != null && body != null) stats.content += body.length(); Log.i(folder.name + " downloaded message id=" + message.id + diff --git a/app/src/main/java/eu/faircode/email/JunkFilter.java b/app/src/main/java/eu/faircode/email/JunkFilter.java new file mode 100644 index 0000000000..f11b38ed5e --- /dev/null +++ b/app/src/main/java/eu/faircode/email/JunkFilter.java @@ -0,0 +1,76 @@ +package eu.faircode.email; + +import android.content.Context; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ExecutorService; + +import de.daslaboratorium.machinelearning.classifier.Classification; +import de.daslaboratorium.machinelearning.classifier.Classifier; +import de.daslaboratorium.machinelearning.classifier.bayes.BayesClassifier; + +public class JunkFilter { + private static final Classifier bayes = new BayesClassifier<>(); + + private static final ExecutorService executor = + Helper.getBackgroundExecutor(1, "junk"); + + static void classify(Context context, String html, String folderType) { + if (EntityFolder.isOutgoing(folderType) || + EntityFolder.ARCHIVE.equals(folderType)) + return; + + final boolean junk = EntityFolder.JUNK.equals(folderType); + + executor.submit(new Runnable() { + @Override + public void run() { + try { + String text = HtmlHelper.getText(context, html); + List words = Arrays.asList(text.split("[^\\p{L}\\p{N}'`]+")); + + Classification classification = bayes.classify(words); + Log.i("MMM folder=" + folderType + " category=" + (classification == null ? null : classification.getCategory())); + + bayes.learn(junk ? "junk" : "ham", words); + } catch (Throwable ex) { + Log.e(ex); + } + } + }); + } + + static void save(Context context) { + final File file = new File(context.getFilesDir(), "junk.filter"); + + executor.submit(new Runnable() { + @Override + public void run() { + try (FileOutputStream fos = new FileOutputStream(file)) { + } catch (Throwable ex) { + Log.e(ex); + } + } + }); + } + + static void load(Context context) { + final File file = new File(context.getFilesDir(), "junk.filter"); + + executor.submit(new Runnable() { + @Override + public void run() { + if (!file.exists()) + return; + try (FileInputStream fis = new FileInputStream(file)) { + } catch (Throwable ex) { + Log.e(ex); + } + } + }); + } +} diff --git a/app/src/main/java/eu/faircode/email/ServiceSynchronize.java b/app/src/main/java/eu/faircode/email/ServiceSynchronize.java index ac172bd589..583cfc8be1 100644 --- a/app/src/main/java/eu/faircode/email/ServiceSynchronize.java +++ b/app/src/main/java/eu/faircode/email/ServiceSynchronize.java @@ -617,6 +617,8 @@ public class ServiceSynchronize extends ServiceBase implements SharedPreferences }); prefs.registerOnSharedPreferenceChangeListener(this); + + JunkFilter.load(this); } @Override @@ -686,6 +688,8 @@ public class ServiceSynchronize extends ServiceBase implements SharedPreferences NotificationManager nm = (NotificationManager) getSystemService(Context.NOTIFICATION_SERVICE); nm.cancel(Helper.NOTIFICATION_SYNCHRONIZE); + JunkFilter.save(this); + super.onDestroy(); }