mirror of
https://github.com/M66B/FairEmail.git
synced 2024-12-26 01:36:55 +00:00
Improved html to text
This commit is contained in:
parent
12751b642b
commit
a28cc1a0f9
2 changed files with 60 additions and 38 deletions
|
@ -1667,7 +1667,11 @@ public class AdapterMessage extends RecyclerView.Adapter<AdapterMessage.ViewHold
|
|||
final boolean show_quotes = properties.getValue("quotes", message.id);
|
||||
final boolean show_images = properties.getValue("images", message.id);
|
||||
|
||||
return HtmlHelper.fromHtml(HtmlHelper.sanitize(context, body, show_quotes), new Html.ImageGetter() {
|
||||
String html = HtmlHelper.sanitize(context, body, show_quotes);
|
||||
if (debug)
|
||||
html += "<pre>" + Html.escapeHtml(html) + "</pre>";
|
||||
|
||||
return HtmlHelper.fromHtml(html, new Html.ImageGetter() {
|
||||
@Override
|
||||
public Drawable getDrawable(String source) {
|
||||
Drawable image = HtmlHelper.decodeImage(source, context, message.id, show_images);
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.jsoup.nodes.Document;
|
|||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.nodes.Node;
|
||||
import org.jsoup.nodes.TextNode;
|
||||
import org.jsoup.safety.Cleaner;
|
||||
import org.jsoup.safety.Whitelist;
|
||||
import org.jsoup.select.NodeTraversor;
|
||||
import org.jsoup.select.NodeVisitor;
|
||||
|
@ -100,12 +101,16 @@ public class HtmlHelper {
|
|||
}
|
||||
|
||||
static String sanitize(Context context, String html, boolean showQuotes) {
|
||||
final Document document = Jsoup.parse(Jsoup.clean(html, Whitelist
|
||||
.relaxed()
|
||||
Document parsed = Jsoup.parse(html);
|
||||
Whitelist whitelist = Whitelist.relaxed()
|
||||
.addTags("hr")
|
||||
.removeTags("col", "colgroup", "thead", "tbody")
|
||||
.removeAttributes("table", "width")
|
||||
.removeAttributes("td", "colspan", "rowspan", "width")
|
||||
.removeAttributes("th", "colspan", "rowspan", "width")
|
||||
.addProtocols("img", "src", "cid")
|
||||
.addProtocols("img", "src", "data")));
|
||||
.addProtocols("img", "src", "data");
|
||||
final Document document = new Cleaner(whitelist).clean(parsed);
|
||||
|
||||
// Quotes
|
||||
if (!showQuotes)
|
||||
|
@ -115,12 +120,12 @@ public class HtmlHelper {
|
|||
// Tables
|
||||
for (Element col : document.select("th,td")) {
|
||||
// prevent line breaks
|
||||
col.select("br").tagName("span").html(" ");
|
||||
col.select("br").tagName("span").html(" ");
|
||||
col.select("div").tagName("span");
|
||||
|
||||
// separate columns by a space
|
||||
if (col.nextElementSibling() != null)
|
||||
col.append(" ");
|
||||
col.append(" ");
|
||||
|
||||
if ("th".equals(col.tagName()))
|
||||
col.tagName("strong");
|
||||
|
@ -180,6 +185,7 @@ public class HtmlHelper {
|
|||
for (Element img : document.select("img")) {
|
||||
String src = img.attr("src");
|
||||
String alt = img.attr("alt");
|
||||
String title = img.attr("title");
|
||||
String height = img.attr("height").trim();
|
||||
String width = img.attr("width").trim();
|
||||
|
||||
|
@ -210,6 +216,10 @@ public class HtmlHelper {
|
|||
div.appendElement("br");
|
||||
div.appendElement("em").text(alt);
|
||||
}
|
||||
if (!TextUtils.isEmpty(title)) {
|
||||
div.appendElement("br");
|
||||
div.appendElement("em").text(title);
|
||||
}
|
||||
|
||||
// Tracking image
|
||||
if ("1".equals(height) && "1".equals(width) && !TextUtils.isEmpty(src)) {
|
||||
|
@ -226,51 +236,59 @@ public class HtmlHelper {
|
|||
public void head(Node node, int depth) {
|
||||
if (node instanceof TextNode) {
|
||||
TextNode tnode = (TextNode) node;
|
||||
Element span = document.createElement("span");
|
||||
|
||||
int pos = 0;
|
||||
String text = tnode.text();
|
||||
Matcher matcher = PatternsCompat.WEB_URL.matcher(text);
|
||||
while (matcher.find()) {
|
||||
boolean linked = false;
|
||||
Node parent = tnode.parent();
|
||||
while (parent != null) {
|
||||
if ("a".equals(parent.nodeName())) {
|
||||
linked = true;
|
||||
break;
|
||||
if (matcher.matches()) {
|
||||
Element span = document.createElement("span");
|
||||
|
||||
int pos = 0;
|
||||
while (matcher.find()) {
|
||||
boolean linked = false;
|
||||
Node parent = tnode.parent();
|
||||
while (parent != null) {
|
||||
if ("a".equals(parent.nodeName())) {
|
||||
linked = true;
|
||||
break;
|
||||
}
|
||||
parent = parent.parent();
|
||||
}
|
||||
parent = parent.parent();
|
||||
|
||||
String scheme = Uri.parse(matcher.group()).getScheme();
|
||||
|
||||
if (BuildConfig.DEBUG)
|
||||
Log.i("Web url=" + matcher.group() + " linked=" + linked + " scheme=" + scheme);
|
||||
|
||||
if (linked || scheme == null)
|
||||
span.appendText(text.substring(pos, matcher.end()));
|
||||
else {
|
||||
span.appendText(text.substring(pos, matcher.start()));
|
||||
|
||||
Element a = document.createElement("a");
|
||||
a.attr("href", matcher.group());
|
||||
a.text(matcher.group());
|
||||
span.appendChild(a);
|
||||
}
|
||||
|
||||
pos = matcher.end();
|
||||
}
|
||||
span.appendText(text.substring(pos));
|
||||
|
||||
String scheme = Uri.parse(matcher.group()).getScheme();
|
||||
|
||||
if (BuildConfig.DEBUG)
|
||||
Log.i("Web url=" + matcher.group() + " linked=" + linked + " scheme=" + scheme);
|
||||
|
||||
if (linked || scheme == null)
|
||||
span.appendText(text.substring(pos, matcher.end()));
|
||||
else {
|
||||
span.appendText(text.substring(pos, matcher.start()));
|
||||
|
||||
Element a = document.createElement("a");
|
||||
a.attr("href", matcher.group());
|
||||
a.text(matcher.group());
|
||||
span.appendChild(a);
|
||||
}
|
||||
|
||||
pos = matcher.end();
|
||||
tnode.before(span);
|
||||
tnode.text("");
|
||||
}
|
||||
span.appendText(text.substring(pos));
|
||||
|
||||
tnode.before(span);
|
||||
tnode.text("");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tail(Node node, int depth) {
|
||||
}
|
||||
}, document.body());
|
||||
}, document);
|
||||
|
||||
// Remove block elements displaying nothing
|
||||
for (Element e : document.select("*"))
|
||||
if (e.isBlock() && !e.hasText() && e.select("img").size() == 0)
|
||||
e.remove();
|
||||
|
||||
return document.body().html();
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue