Improved html to text conversion

This commit is contained in:
M66B 2019-01-05 11:17:33 +00:00
parent 426098dd41
commit 8ad66eb162
5 changed files with 55 additions and 24 deletions

View File

@ -25,8 +25,6 @@ import android.os.Bundle;
import android.text.TextUtils;
import android.view.MenuItem;
import org.jsoup.Jsoup;
import java.util.ArrayList;
import javax.mail.internet.AddressException;
@ -113,13 +111,17 @@ public class ActivityCompose extends ActivityBilling implements FragmentManager.
if (intent.hasExtra(Intent.EXTRA_SUBJECT)) {
String subject = intent.getStringExtra(Intent.EXTRA_SUBJECT);
if (subject != null)
args.putString("subject", Jsoup.parse(subject).text());
args.putString("subject", subject);
}
if (intent.hasExtra(Intent.EXTRA_TEXT)) {
String body = intent.getStringExtra(Intent.EXTRA_TEXT); // Intent.EXTRA_HTML_TEXT
if (intent.hasExtra(Intent.EXTRA_HTML_TEXT)) {
String html = intent.getStringExtra(Intent.EXTRA_HTML_TEXT);
if (html != null)
args.putString("body", HtmlHelper.getText(html));
} else if (intent.hasExtra(Intent.EXTRA_TEXT)) {
String body = intent.getStringExtra(Intent.EXTRA_TEXT);
if (body != null)
args.putString("body", Jsoup.parse(body).text());
args.putString("body", body);
}
if (intent.hasExtra(Intent.EXTRA_STREAM))

View File

@ -69,7 +69,6 @@ import android.widget.Toast;
import com.google.android.material.bottomnavigation.BottomNavigationView;
import com.google.android.material.snackbar.Snackbar;
import org.jsoup.Jsoup;
import org.xml.sax.XMLReader;
import java.io.IOException;
@ -1324,7 +1323,7 @@ public class AdapterMessage extends RecyclerView.Adapter<AdapterMessage.ViewHold
}
sb.append("\r\n");
sb.append(Jsoup.parse(message.read(context)).text());
sb.append(HtmlHelper.getText(message.read(context)));
return sb.toString();
}

View File

@ -43,12 +43,17 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HtmlHelper {
private static final int PREVIEW_SIZE = 250;
private static Pattern pattern = Pattern.compile("([http|https]+://[\\w\\S(\\.|:|/)]+)");
private static final List<String> heads = Arrays.asList("p", "h1", "h2", "h3", "h4", "h5", "tr");
private static final List<String> tails = Arrays.asList("br", "dd", "dt", "p", "h1", "h2", "h3", "h4", "h5");
static String getBody(String html) {
return Jsoup.parse(html).body().html();
@ -251,6 +256,36 @@ public class HtmlHelper {
static String getPreview(String body) {
String text = (body == null ? null : Jsoup.parse(body).text());
return (text == null ? null : text.substring(0, Math.min(text.length(), 250)));
return (text == null ? null : text.substring(0, Math.min(text.length(), PREVIEW_SIZE)));
}
static String getText(String html) {
final StringBuilder sb = new StringBuilder();
NodeTraversor.traverse(new NodeVisitor() {
public void head(Node node, int depth) {
if (node instanceof TextNode)
sb.append(((TextNode) node).text());
else {
String name = node.nodeName();
if (name.equals("li"))
sb.append("\n * ");
else if (name.equals("dt"))
sb.append(" ");
else if (heads.contains(name))
sb.append("\n");
}
}
public void tail(Node node, int depth) {
String name = node.nodeName();
if (tails.contains(name))
sb.append("\n");
else if (name.equals("a"))
sb.append(" <").append(node.absUrl("href")).append(">");
}
}, Jsoup.parse(html));
return sb.toString();
}
}

View File

@ -23,8 +23,6 @@ import android.content.Context;
import android.text.TextUtils;
import android.webkit.MimeTypeMap;
import org.jsoup.Jsoup;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
@ -290,7 +288,7 @@ public class MessageHelper {
body.append(identity.signature);
}
String plain = Jsoup.parse(body.toString()).text();
String plain = HtmlHelper.getText(body.toString());
StringBuilder html = new StringBuilder();
html.append("<!DOCTYPE html>").append("\n");

View File

@ -65,7 +65,6 @@ import com.sun.mail.util.MailConnectException;
import org.json.JSONArray;
import org.json.JSONException;
import org.jsoup.Jsoup;
import java.io.IOException;
import java.io.InputStream;
@ -147,7 +146,6 @@ public class ServiceSynchronize extends LifecycleService {
private static final int SYNC_BATCH_SIZE = 20;
private static final int DOWNLOAD_BATCH_SIZE = 20;
private static final long RECONNECT_BACKOFF = 90 * 1000L; // milliseconds
private static final int PREVIEW_SIZE = 250;
private static final int ACCOUNT_ERROR_AFTER = 90; // minutes
private static final int IDENTITY_ERROR_AFTER = 30; // minutes
private static final long STOP_DELAY = 5000L; // milliseconds
@ -645,11 +643,11 @@ public class ServiceSynchronize extends LifecycleService {
if (message.content)
try {
String html = message.read(ServiceSynchronize.this);
String html = message.read(this);
StringBuilder sb = new StringBuilder();
if (!TextUtils.isEmpty(message.subject))
sb.append(message.subject).append("<br>");
sb.append(Jsoup.parse(html).text());
sb.append(HtmlHelper.getPreview(html));
mbuilder.setStyle(new Notification.BigTextStyle().bigText(Html.fromHtml(sb.toString())));
} catch (IOException ex) {
Log.e(ex);
@ -1277,7 +1275,7 @@ public class ServiceSynchronize extends LifecycleService {
};
String id = BuildConfig.APPLICATION_ID + ".POLL." + account.id;
PendingIntent pi = PendingIntent.getBroadcast(ServiceSynchronize.this, 0, new Intent(id), 0);
PendingIntent pi = PendingIntent.getBroadcast(this, 0, new Intent(id), 0);
registerReceiver(alarm, new IntentFilter(id));
// Keep alive
@ -1335,7 +1333,7 @@ public class ServiceSynchronize extends LifecycleService {
Log.e(account.name, ex);
reportError(account, null, ex);
EntityLog.log(ServiceSynchronize.this, account.name + " " + Helper.formatThrowable(ex));
EntityLog.log(this, account.name + " " + Helper.formatThrowable(ex));
db.account().setAccountError(account.id, Helper.formatThrowable(ex));
} finally {
// Stop watching for operations
@ -1351,9 +1349,9 @@ public class ServiceSynchronize extends LifecycleService {
// Close store
try {
EntityLog.log(ServiceSynchronize.this, account.name + " store closing");
EntityLog.log(this, account.name + " store closing");
istore.close();
EntityLog.log(ServiceSynchronize.this, account.name + " store closed");
EntityLog.log(this, account.name + " store closed");
} catch (Throwable ex) {
Log.w(account.name, ex);
} finally {
@ -1389,7 +1387,7 @@ public class ServiceSynchronize extends LifecycleService {
};
String id = BuildConfig.APPLICATION_ID + ".BACKOFF." + account.id;
PendingIntent pi = PendingIntent.getBroadcast(ServiceSynchronize.this, 0, new Intent(id), 0);
PendingIntent pi = PendingIntent.getBroadcast(this, 0, new Intent(id), 0);
registerReceiver(alarm, new IntentFilter(id));
AlarmManager am = (AlarmManager) getSystemService(Context.ALARM_SERVICE);
@ -1817,7 +1815,7 @@ public class ServiceSynchronize extends LifecycleService {
message.uid = null;
db.message().updateMessage(message);
Log.i("Appending sent msgid=" + message.msgid);
EntityOperation.queue(ServiceSynchronize.this, db, message, EntityOperation.ADD); // Could already exist
EntityOperation.queue(this, db, message, EntityOperation.ADD); // Could already exist
}
}
@ -1900,8 +1898,7 @@ public class ServiceSynchronize extends LifecycleService {
MessageHelper helper = new MessageHelper((MimeMessage) imessage);
String html = helper.getHtml();
String text = (html == null ? null : Jsoup.parse(html).text());
String preview = (text == null ? null : text.substring(0, Math.min(text.length(), PREVIEW_SIZE)));
String preview = HtmlHelper.getPreview(html);
message.write(this, html);
db.message().setMessageContent(message.id, true, preview);
}