FairEmail/app/src/main/java/eu/faircode/email/UriHelper.java

402 lines
14 KiB
Java

package eu.faircode.email;
/*
This file is part of FairEmail.
FairEmail is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
FairEmail is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with FairEmail. If not, see <http://www.gnu.org/licenses/>.
Copyright 2018-2022 by Marcel Bokhorst (M66B)
*/
import android.content.Context;
import android.net.Uri;
import android.text.TextUtils;
import android.util.Base64;
import android.webkit.URLUtil;
import androidx.annotation.NonNull;
import androidx.core.util.PatternsCompat;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
public class UriHelper {
// https://publicsuffix.org/
private static final HashSet<String> suffixList = new HashSet<>();
// https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat
private static final String SUFFIX_LIST_NAME = "public_suffix_list.dat";
// https://github.com/svenjacobs/leon
// https://github.com/newhouse/url-tracking-stripper
// https://maxchadwick.xyz/tracking-query-params-registry/
private static final List<String> PARANOID_QUERY = Collections.unmodifiableList(Arrays.asList(
// https://en.wikipedia.org/wiki/UTM_parameters
"awt_a", // AWeber
"awt_l", // AWeber
"awt_m", // AWeber
"icid", // Adobe
"ef_id", // https://experienceleague.adobe.com/docs/advertising-cloud/integrations/analytics/mc/mc-ids.html
"_ga", // Google Analytics
"gclid", // Google
"gclsrc", // Google ads
"dclid", // DoubleClick (Google)
"fbclid", // Facebook
"igshid", // Instagram
"msclkid", // https://help.ads.microsoft.com/apex/index/3/en/60000
"mc_cid", // MailChimp
"mc_eid", // MailChimp
"zanpid", // Zanox (Awin)
"kclickid" // https://support.freespee.com/hc/en-us/articles/202577831-Kenshoo-integration
));
// https://github.com/snarfed/granary/blob/master/granary/facebook.py#L1789
private static final List<String> FACEBOOK_WHITELIST_PATH = Collections.unmodifiableList(Arrays.asList(
"/nd/", "/n/", "/story.php"
));
private static final List<String> FACEBOOK_WHITELIST_QUERY = Collections.unmodifiableList(Arrays.asList(
"story_fbid", "fbid", "id", "comment_id"
));
static String getParentDomain(Context context, String host) {
if (host == null)
return null;
int dot = host.indexOf('.');
if (dot < 0)
return null;
String parent = host.substring(dot + 1);
String tld = getTld(context, host);
if (tld == null || tld.equals(parent) || parent.length() < tld.length())
return null;
return parent;
}
static String getRootDomain(Context context, String host) {
if (host == null)
return null;
String tld = getTld(context, host);
if (tld == null)
return null;
if (tld.equalsIgnoreCase(host))
return null;
int len = host.length() - tld.length() - 1;
if (len < 0) {
Log.e("getRootDomain host=" + host + " tld=" + tld);
return null;
}
int dot = host.substring(0, len).lastIndexOf('.');
if (dot < 0)
return host;
return host.substring(dot + 1);
}
static boolean isTld(Context context, String host) {
if (host == null)
return false;
String tld = getTld(context, host);
return (tld != null && tld.equals(host));
}
static boolean hasTld(Context context, String host) {
return (getTld(context, host) != null);
}
static String getTld(Context context, @NonNull String host) {
ensureSuffixList(context);
String eval = host.toLowerCase(Locale.ROOT);
while (true) {
int d = eval.indexOf('.');
String w = (d < 0 ? null : '*' + eval.substring(d));
synchronized (suffixList) {
if (suffixList.contains(eval))
return eval;
if (suffixList.contains(w))
if (suffixList.contains('!' + eval))
return eval.substring(d + 1);
else
return eval;
}
int dot = eval.indexOf('.');
if (dot < 0)
return null;
eval = eval.substring(dot + 1);
}
}
static String getEmailUser(String address) {
if (address == null)
return null;
int at = address.indexOf('@');
if (at > 0)
return address.substring(0, at);
return null;
}
static String getEmailDomain(String address) {
if (address == null)
return null;
int at = address.indexOf('@');
if (at > 0)
return address.substring(at + 1);
return null;
}
static @NonNull
Uri guessScheme(@NonNull Uri uri) {
if (uri.getScheme() != null)
return uri;
String url = uri.toString();
if (Helper.EMAIL_ADDRESS.matcher(url).matches())
return Uri.parse("mailto:" + url);
else if (PatternsCompat.IP_ADDRESS.matcher(url).matches())
return Uri.parse("https://" + url);
else if (android.util.Patterns.PHONE.matcher(url).matches())
// Patterns.PHONE (\+[0-9]+[\- \.]*)?(\([0-9]+\)[\- \.]*)?([0-9][0-9\- \.]+[0-9])
// PhoneNumberUtils.isGlobalPhoneNumber() [\+]?[0-9.-]+
return Uri.parse("tel:" + url);
else {
Uri g = Uri.parse(URLUtil.guessUrl(url));
String scheme = g.getScheme();
if (scheme == null)
return uri;
else if ("http".equals(scheme))
scheme = "https";
return Uri.parse(scheme + "://" + url);
}
}
static int getSuffixCount(Context context) {
ensureSuffixList(context);
synchronized (suffixList) {
return suffixList.size();
}
}
private static void ensureSuffixList(Context context) {
synchronized (suffixList) {
if (suffixList.size() > 0)
return;
Log.i("Reading " + SUFFIX_LIST_NAME);
try (InputStream is = context.getAssets().open(SUFFIX_LIST_NAME)) {
BufferedReader br = new BufferedReader(new InputStreamReader((is)));
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
if (TextUtils.isEmpty(line))
continue;
if (line.startsWith("//"))
continue;
suffixList.add(line);
}
Log.i(SUFFIX_LIST_NAME + "=" + suffixList.size());
} catch (Throwable ex) {
Log.e(ex);
}
}
}
static Uri sanitize(Uri uri) {
if (uri.isOpaque())
return uri;
Uri url;
boolean changed = false;
if (uri.getHost() != null &&
uri.getHost().endsWith("safelinks.protection.outlook.com") &&
!TextUtils.isEmpty(uri.getQueryParameter("url"))) {
changed = true;
url = Uri.parse(uri.getQueryParameter("url"));
} else if ("https".equals(uri.getScheme()) &&
"smex-ctp.trendmicro.com".equals(uri.getHost()) &&
"/wis/clicktime/v1/query".equals(uri.getPath()) &&
!TextUtils.isEmpty(uri.getQueryParameter("url"))) {
changed = true;
url = Uri.parse(uri.getQueryParameter("url"));
} else if ("https".equals(uri.getScheme()) &&
"www.google.com".equals(uri.getHost()) &&
uri.getPath() != null &&
uri.getPath().startsWith("/amp/")) {
// https://blog.amp.dev/2017/02/06/whats-in-an-amp-url/
Uri result = null;
String u = uri.toString();
u = u.replace("https://www.google.com/amp/", "");
int p = u.indexOf("/");
while (p > 0) {
String segment = u.substring(0, p);
if (segment.contains(".")) {
result = Uri.parse("https://" + u);
break;
}
u = u.substring(p + 1);
p = u.indexOf("/");
}
changed = (result != null);
url = (result == null ? uri : result);
} else if ("https".equals(uri.getScheme()) &&
uri.getHost() != null &&
uri.getHost().startsWith("www.google.") &&
uri.getQueryParameter("url") != null) {
// Google non-com redirects
Uri result = Uri.parse(uri.getQueryParameter("url"));
changed = (result != null);
url = (result == null ? uri : result);
} else if (uri.getQueryParameterNames().size() == 1) {
// Sophos Email Appliance
Uri result = null;
String key = uri.getQueryParameterNames().iterator().next();
if (TextUtils.isEmpty(uri.getQueryParameter(key)))
try {
String data = new String(Base64.decode(key, Base64.DEFAULT));
int v = data.indexOf("ver=");
int u = data.indexOf("&&url=");
if (v == 0 && u > 0)
result = Uri.parse(URLDecoder.decode(data.substring(u + 6), StandardCharsets.UTF_8.name()));
} catch (Throwable ex) {
Log.w(ex);
}
changed = (result != null);
url = (result == null ? uri : result);
} else if (uri.getQueryParameter("redirectUrl") != null) {
// https://.../link-tracker?redirectUrl=<base64>&sig=...&iat=...&a=...&account=...&email=...&s=...&i=...
try {
byte[] bytes = Base64.decode(uri.getQueryParameter("redirectUrl"), 0);
String u = URLDecoder.decode(new String(bytes), StandardCharsets.UTF_8.name());
Uri result = Uri.parse(u);
changed = (result != null);
url = (result == null ? uri : result);
} catch (Throwable ex) {
Log.i(ex);
url = uri;
}
} else
url = uri;
if (url.isOpaque() || !UriHelper.isHyperLink(url))
return uri;
Uri.Builder builder = url.buildUpon();
builder.clearQuery();
String host = uri.getHost();
String path = uri.getPath();
if (host != null)
host = host.toLowerCase(Locale.ROOT);
if (path != null)
path = path.toLowerCase(Locale.ROOT);
boolean first = "www.facebook.com".equals(host);
for (String key : url.getQueryParameterNames()) {
// https://en.wikipedia.org/wiki/UTM_parameters
// https://docs.oracle.com/en/cloud/saas/marketing/eloqua-user/Help/EloquaAsynchronousTrackingScripts/EloquaTrackingParameters.htm
String lkey = key.toLowerCase(Locale.ROOT);
if (PARANOID_QUERY.contains(lkey) ||
lkey.startsWith("utm_") ||
lkey.startsWith("elq") ||
((host != null && host.endsWith("facebook.com")) &&
!first &&
FACEBOOK_WHITELIST_PATH.contains(path) &&
!FACEBOOK_WHITELIST_QUERY.contains(lkey)) ||
("store.steampowered.com".equals(host) &&
"snr".equals(lkey)))
changed = true;
else if (!TextUtils.isEmpty(key))
for (String value : url.getQueryParameters(key)) {
Log.i("Query " + key + "=" + value);
Uri suri = Uri.parse(value);
if ("http".equals(suri.getScheme()) || "https".equals(suri.getScheme())) {
Uri s = sanitize(suri);
if (s != null) {
changed = true;
value = s.toString();
}
}
builder.appendQueryParameter(key, value);
}
first = false;
}
return (changed ? builder.build() : null);
}
static Uri secure(Uri uri, boolean https) {
String scheme = uri.getScheme();
if (https ? "http".equals(scheme) : "https".equals(scheme)) {
Uri.Builder builder = uri.buildUpon();
builder.scheme(https ? "https" : "http");
String authority = uri.getEncodedAuthority();
if (authority != null) {
authority = authority.replace(https ? ":80" : ":443", https ? ":443" : ":80");
builder.encodedAuthority(authority);
}
return builder.build();
} else
return uri;
}
static boolean isSecure(Uri uri) {
return (!uri.isOpaque() && "https".equals(uri.getScheme()));
}
static boolean isHyperLink(Uri uri) {
return (!uri.isOpaque() &&
("http".equals(uri.getScheme()) || "https".equals(uri.getScheme())));
}
static void test(Context context) {
String[] hosts = new String[]{
"child.parent.example.com", "parent.example.com", "example.com", "com",
"child.parent.co.uk", "parent.co.uk", "co.uk", "uk",
"child.parent.aaa.ck", "parent.aaa.ck", "aaa.ck", "ck",
"child.parent.www.ck", "parent.www.ck", "www.ck", "ck"
};
for (String host : hosts)
Log.i("PSL " + host + ":" +
" tld=" + getTld(context, host) +
" root=" + getRootDomain(context, host) +
" parent=" + getParentDomain(context, host));
}
}