FairEmail/app/src/main/java/eu/faircode/email/Markdown.java

137 lines
5.1 KiB
Java
Raw Normal View History

2024-02-19 15:06:44 +00:00
package eu.faircode.email;
/*
This file is part of FairEmail.
FairEmail is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
FairEmail is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with FairEmail. If not, see <http://www.gnu.org/licenses/>.
Copyright 2018-2024 by Marcel Bokhorst (M66B)
*/
2024-02-20 09:59:17 +00:00
import com.vladsch.flexmark.html2md.converter.FlexmarkHtmlConverter;
2024-02-20 11:35:01 +00:00
import com.vladsch.flexmark.util.data.DataHolder;
import com.vladsch.flexmark.util.data.MutableDataSet;
2024-02-20 09:59:17 +00:00
2024-02-20 07:54:20 +00:00
import org.commonmark.Extension;
import org.commonmark.ext.gfm.strikethrough.StrikethroughExtension;
import org.commonmark.ext.gfm.tables.TablesExtension;
import org.commonmark.ext.ins.InsExtension;
import org.commonmark.ext.task.list.items.TaskListItemsExtension;
2024-02-19 15:06:44 +00:00
import org.commonmark.node.Node;
import org.commonmark.parser.Parser;
import org.commonmark.renderer.html.HtmlRenderer;
2024-02-24 09:18:13 +00:00
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
2024-02-19 15:06:44 +00:00
2024-02-20 07:54:20 +00:00
import java.util.Arrays;
2024-02-24 09:18:13 +00:00
import java.util.HashMap;
2024-02-20 07:54:20 +00:00
import java.util.List;
2024-02-24 09:18:13 +00:00
import java.util.Map;
2024-02-20 07:54:20 +00:00
2024-02-19 15:06:44 +00:00
public class Markdown {
static String toHtml(String markdown) {
2024-02-21 06:55:48 +00:00
// https://github.com/commonmark/commonmark-java#usage
// https://github.com/commonmark/commonmark-java/issues/294
2024-02-20 09:56:33 +00:00
markdown = markdown.replace('\u00a0', ' ');
2024-02-24 09:18:13 +00:00
2024-02-20 07:54:20 +00:00
List<Extension> extensions = Arrays.asList(
InsExtension.create(),
TaskListItemsExtension.create(),
2024-02-20 07:54:20 +00:00
TablesExtension.create(),
StrikethroughExtension.create());
Parser p = Parser.builder()
.extensions(extensions)
.build();
2024-02-19 15:06:44 +00:00
Node d = p.parse(markdown);
2024-02-20 07:54:20 +00:00
HtmlRenderer r = HtmlRenderer.builder()
.extensions(extensions)
.build();
2024-02-23 11:16:29 +00:00
String html = r.render(d);
if (BuildConfig.DEBUG) {
Log.i("Markdown md=" + markdown.replace('\n', '|'));
2024-02-24 09:18:13 +00:00
Log.i("Markdown html=" + html.replace('\n', '|'));
2024-02-23 11:16:29 +00:00
}
return html;
2024-02-19 15:06:44 +00:00
}
2024-02-20 09:56:33 +00:00
static String fromHtml(String html) {
2024-02-20 09:59:17 +00:00
// https://github.com/vsch/flexmark-java/wiki/Extensions#html-to-markdown
2024-02-24 09:18:13 +00:00
Map<String, String> specialCharsMap = new HashMap<>();
//specialCharsMap.put("“", "\"");
//specialCharsMap.put("”", "\"");
specialCharsMap.put("&ldquo;", "\"");
specialCharsMap.put("&rdquo;", "\"");
//specialCharsMap.put("", "'");
//specialCharsMap.put("", "'");
specialCharsMap.put("&lsquo;", "'");
specialCharsMap.put("&rsquo;", "'");
specialCharsMap.put("&apos;", "'");
//specialCharsMap.put("«", "<<");
specialCharsMap.put("&laquo;", "<<");
//specialCharsMap.put("»", ">>");
specialCharsMap.put("&raquo;", ">>");
//specialCharsMap.put("…", "...");
specialCharsMap.put("&hellip;", "...");
//specialCharsMap.put("", "--");
specialCharsMap.put("&endash;", "--");
//specialCharsMap.put("—", "---");
specialCharsMap.put("&emdash;", "---");
2024-02-20 11:35:01 +00:00
DataHolder options = new MutableDataSet()
.set(FlexmarkHtmlConverter.SETEXT_HEADINGS, false)
2024-02-24 09:18:13 +00:00
.set(FlexmarkHtmlConverter.OUTPUT_ATTRIBUTES_ID, false)
.set(FlexmarkHtmlConverter.TYPOGRAPHIC_REPLACEMENT_MAP, specialCharsMap)
2024-02-20 11:35:01 +00:00
.toImmutable();
2024-02-24 09:18:13 +00:00
// Remove nested/empty tables
Document doc = JsoupEx.parse(html);
for (Element table : doc.select("table")) {
boolean empty = false;
Elements children = table.children().select("table");
if (children.size() == 0)
for (Element tr : table.children()) {
if (tr.children().size() == 1) {
empty = true;
break;
}
if (empty)
break;
}
if (children.size() > 0 || empty) {
table.tagName("div");
for (Element child : table.children())
if ("tr".equals(child.tagName()))
child.tagName("div");
else if ("td".equals(child.tagName()))
child.tagName("span");
}
}
2024-02-20 11:35:01 +00:00
String markdown = FlexmarkHtmlConverter.builder(options)
2024-02-20 09:59:17 +00:00
.build()
2024-02-24 09:18:13 +00:00
.convert(doc.html());
2024-02-23 11:16:29 +00:00
if (BuildConfig.DEBUG) {
2024-02-24 09:18:13 +00:00
Log.i("Markdown html=" + html.replace('\n', '|'));
2024-02-23 11:16:29 +00:00
Log.i("Markdown md=" + markdown.replace('\n', '|'));
}
2024-02-24 09:18:13 +00:00
return markdown
.replaceAll("(?m)^( *)(\\d+)\\.( +)", "$1$2\\\\.$3")
2024-02-24 09:02:46 +00:00
.replaceAll("<br />", "")
.replaceAll("\n\n\\s+<!-- -->\n", "");
2024-02-19 15:06:44 +00:00
}
}