FairEmail/app/src/main/java/eu/faircode/email/UriHelper.java

343 lines
12 KiB
Java
Raw Normal View History

package eu.faircode.email;
/*
This file is part of FairEmail.
FairEmail is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
FairEmail is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with FairEmail. If not, see <http://www.gnu.org/licenses/>.
2022-01-01 08:46:36 +00:00
Copyright 2018-2022 by Marcel Bokhorst (M66B)
*/
2021-07-20 20:16:20 +00:00
import android.content.Context;
2021-09-02 18:28:27 +00:00
import android.net.Uri;
2021-07-20 20:16:20 +00:00
import android.text.TextUtils;
2021-09-12 11:18:16 +00:00
import android.util.Base64;
2021-09-02 18:28:27 +00:00
import android.webkit.URLUtil;
2021-07-20 20:16:20 +00:00
2021-08-17 08:39:27 +00:00
import androidx.annotation.NonNull;
2021-09-09 06:23:55 +00:00
import androidx.core.util.PatternsCompat;
2021-08-17 08:39:27 +00:00
2021-07-20 20:16:20 +00:00
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
2021-09-12 11:18:16 +00:00
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
2021-07-20 20:16:20 +00:00
import java.util.HashSet;
2021-09-12 11:18:16 +00:00
import java.util.List;
import java.util.Locale;
2021-07-20 20:16:20 +00:00
public class UriHelper {
2021-07-20 20:16:20 +00:00
// https://publicsuffix.org/
private static final HashSet<String> suffixList = new HashSet<>();
2021-08-07 07:26:44 +00:00
// https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat
private static final String SUFFIX_LIST_NAME = "public_suffix_list.dat";
2021-07-20 20:16:20 +00:00
// https://github.com/svenjacobs/leon
2021-09-12 11:18:16 +00:00
// https://github.com/newhouse/url-tracking-stripper
// https://maxchadwick.xyz/tracking-query-params-registry/
2021-09-12 11:18:16 +00:00
private static final List<String> PARANOID_QUERY = Collections.unmodifiableList(Arrays.asList(
// https://en.wikipedia.org/wiki/UTM_parameters
"awt_a", // AWeber
"awt_l", // AWeber
"awt_m", // AWeber
"icid", // Adobe
"ef_id", // https://experienceleague.adobe.com/docs/advertising-cloud/integrations/analytics/mc/mc-ids.html
"_ga", // Google Analytics
2021-09-12 11:18:16 +00:00
"gclid", // Google
"gclsrc", // Google ads
"dclid", // DoubleClick (Google)
"fbclid", // Facebook
"igshid", // Instagram
"msclkid", // https://help.ads.microsoft.com/apex/index/3/en/60000
2021-09-12 11:18:16 +00:00
"mc_cid", // MailChimp
"mc_eid", // MailChimp
"zanpid", // Zanox (Awin)
"kclickid" // https://support.freespee.com/hc/en-us/articles/202577831-Kenshoo-integration
));
// https://github.com/snarfed/granary/blob/master/granary/facebook.py#L1789
private static final List<String> FACEBOOK_WHITELIST_PATH = Collections.unmodifiableList(Arrays.asList(
"/nd/", "/n/", "/story.php"
));
private static final List<String> FACEBOOK_WHITELIST_QUERY = Collections.unmodifiableList(Arrays.asList(
"story_fbid", "fbid", "id", "comment_id"
));
2021-07-20 20:16:20 +00:00
static String getParentDomain(Context context, String host) {
if (host == null)
return null;
String parent = _getSuffix(context, host);
return (parent == null ? host : parent);
}
static boolean hasParentDomain(Context context, String host) {
return (host != null && _getSuffix(context, host) != null);
}
2021-08-17 08:39:27 +00:00
private static String _getSuffix(Context context, @NonNull String host) {
2021-07-20 20:16:20 +00:00
ensureSuffixList(context);
String h = host.toLowerCase(Locale.ROOT);
2021-07-20 20:16:20 +00:00
while (true) {
int dot = h.indexOf('.');
if (dot < 0)
return null;
2021-07-20 20:16:20 +00:00
String prefix = h.substring(0, dot);
h = h.substring(dot + 1);
2021-07-20 20:16:20 +00:00
int d = h.indexOf('.');
String w = (d < 0 ? null : '*' + h.substring(d));
synchronized (suffixList) {
if (!suffixList.contains('!' + h) &&
(suffixList.contains(h) || suffixList.contains(w))) {
2021-07-20 20:16:20 +00:00
String parent = prefix + "." + h;
2021-08-11 13:42:07 +00:00
Log.d("Host=" + host + " parent=" + parent);
2021-07-20 20:16:20 +00:00
return parent;
}
}
}
}
2021-07-02 07:51:56 +00:00
static String getEmailUser(String address) {
if (address == null)
return null;
int at = address.indexOf('@');
if (at > 0)
return address.substring(0, at);
return null;
}
static String getEmailDomain(String address) {
if (address == null)
return null;
int at = address.indexOf('@');
if (at > 0)
return address.substring(at + 1);
2021-07-02 07:51:56 +00:00
return null;
}
2021-07-20 20:16:20 +00:00
2021-09-02 18:28:27 +00:00
static @NonNull
Uri guessScheme(@NonNull Uri uri) {
if (uri.getScheme() != null)
return uri;
String url = uri.toString();
if (Helper.EMAIL_ADDRESS.matcher(url).matches())
return Uri.parse("mailto:" + url);
2021-09-09 06:23:55 +00:00
else if (PatternsCompat.IP_ADDRESS.matcher(url).matches())
return Uri.parse("https://" + url);
2021-09-02 18:28:27 +00:00
else if (android.util.Patterns.PHONE.matcher(url).matches())
2021-09-09 06:23:55 +00:00
// Patterns.PHONE (\+[0-9]+[\- \.]*)?(\([0-9]+\)[\- \.]*)?([0-9][0-9\- \.]+[0-9])
// PhoneNumberUtils.isGlobalPhoneNumber() [\+]?[0-9.-]+
2021-09-02 18:28:27 +00:00
return Uri.parse("tel:" + url);
else {
Uri g = Uri.parse(URLUtil.guessUrl(url));
String scheme = g.getScheme();
if (scheme == null)
return uri;
else if ("http".equals(scheme))
scheme = "https";
return Uri.parse(scheme + "://" + url);
}
}
2021-09-28 18:07:17 +00:00
static int getSuffixCount(Context context) {
ensureSuffixList(context);
synchronized (suffixList) {
return suffixList.size();
}
}
private static void ensureSuffixList(Context context) {
2021-07-20 20:16:20 +00:00
synchronized (suffixList) {
if (suffixList.size() > 0)
return;
Log.i("Reading " + SUFFIX_LIST_NAME);
try (InputStream is = context.getAssets().open(SUFFIX_LIST_NAME)) {
BufferedReader br = new BufferedReader(new InputStreamReader((is)));
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
if (TextUtils.isEmpty(line))
continue;
if (line.startsWith("//"))
continue;
suffixList.add(line);
}
Log.i(SUFFIX_LIST_NAME + "=" + suffixList.size());
} catch (Throwable ex) {
Log.e(ex);
}
}
}
2021-09-12 11:18:16 +00:00
static Uri sanitize(Uri uri) {
2021-09-12 11:37:10 +00:00
if (uri.isOpaque())
return uri;
2021-09-12 11:18:16 +00:00
Uri url;
2021-09-12 11:37:10 +00:00
boolean changed = false;
2021-09-12 11:18:16 +00:00
if (uri.getHost() != null &&
uri.getHost().endsWith("safelinks.protection.outlook.com") &&
!TextUtils.isEmpty(uri.getQueryParameter("url"))) {
changed = true;
url = Uri.parse(uri.getQueryParameter("url"));
} else if ("https".equals(uri.getScheme()) &&
"smex-ctp.trendmicro.com".equals(uri.getHost()) &&
"/wis/clicktime/v1/query".equals(uri.getPath()) &&
!TextUtils.isEmpty(uri.getQueryParameter("url"))) {
changed = true;
url = Uri.parse(uri.getQueryParameter("url"));
} else if ("https".equals(uri.getScheme()) &&
"www.google.com".equals(uri.getHost()) &&
uri.getPath() != null &&
uri.getPath().startsWith("/amp/")) {
// https://blog.amp.dev/2017/02/06/whats-in-an-amp-url/
Uri result = null;
String u = uri.toString();
u = u.replace("https://www.google.com/amp/", "");
int p = u.indexOf("/");
while (p > 0) {
String segment = u.substring(0, p);
if (segment.contains(".")) {
result = Uri.parse("https://" + u);
break;
}
u = u.substring(p + 1);
p = u.indexOf("/");
}
2022-01-04 18:03:21 +00:00
changed = (result != null);
url = (result == null ? uri : result);
} else if ("https".equals(uri.getScheme()) &&
uri.getHost() != null &&
uri.getHost().startsWith("www.google.") &&
uri.getQueryParameter("url") != null) {
// Google non-com redirects
Uri result = Uri.parse(uri.getQueryParameter("url"));
2021-09-12 11:18:16 +00:00
changed = (result != null);
url = (result == null ? uri : result);
} else if (uri.getQueryParameterNames().size() == 1) {
// Sophos Email Appliance
Uri result = null;
2021-09-12 12:39:21 +00:00
2021-09-12 11:18:16 +00:00
String key = uri.getQueryParameterNames().iterator().next();
if (TextUtils.isEmpty(uri.getQueryParameter(key)))
try {
String data = new String(Base64.decode(key, Base64.DEFAULT));
2021-09-12 12:39:21 +00:00
int v = data.indexOf("ver=");
2021-09-12 11:18:16 +00:00
int u = data.indexOf("&&url=");
2021-09-12 12:39:21 +00:00
if (v == 0 && u > 0)
2021-09-12 11:18:16 +00:00
result = Uri.parse(URLDecoder.decode(data.substring(u + 6), StandardCharsets.UTF_8.name()));
} catch (Throwable ex) {
Log.w(ex);
}
changed = (result != null);
url = (result == null ? uri : result);
} else
url = uri;
if (url.isOpaque())
return uri;
2021-09-12 11:37:10 +00:00
Uri.Builder builder = url.buildUpon();
2021-09-12 11:18:16 +00:00
builder.clearQuery();
String host = uri.getHost();
String path = uri.getPath();
if (host != null)
host = host.toLowerCase(Locale.ROOT);
if (path != null)
path = path.toLowerCase(Locale.ROOT);
boolean first = "www.facebook.com".equals(host);
for (String key : url.getQueryParameterNames()) {
// https://en.wikipedia.org/wiki/UTM_parameters
// https://docs.oracle.com/en/cloud/saas/marketing/eloqua-user/Help/EloquaAsynchronousTrackingScripts/EloquaTrackingParameters.htm
String lkey = key.toLowerCase(Locale.ROOT);
if (PARANOID_QUERY.contains(lkey) ||
lkey.startsWith("utm_") ||
lkey.startsWith("elq") ||
((host != null && host.endsWith("facebook.com")) &&
!first &&
FACEBOOK_WHITELIST_PATH.contains(path) &&
!FACEBOOK_WHITELIST_QUERY.contains(lkey)) ||
("store.steampowered.com".equals(host) &&
"snr".equals(lkey)))
changed = true;
else if (!TextUtils.isEmpty(key))
for (String value : url.getQueryParameters(key)) {
Log.i("Query " + key + "=" + value);
Uri suri = Uri.parse(value);
if ("http".equals(suri.getScheme()) || "https".equals(suri.getScheme())) {
Uri s = sanitize(suri);
if (s != null) {
changed = true;
value = s.toString();
}
}
builder.appendQueryParameter(key, value);
}
first = false;
}
return (changed ? builder.build() : null);
}
static Uri secure(Uri uri, boolean https) {
String scheme = uri.getScheme();
if (https ? "http".equals(scheme) : "https".equals(scheme)) {
Uri.Builder builder = uri.buildUpon();
builder.scheme(https ? "https" : "http");
String authority = uri.getEncodedAuthority();
if (authority != null) {
authority = authority.replace(https ? ":80" : ":443", https ? ":443" : ":80");
builder.encodedAuthority(authority);
}
return builder.build();
} else
return uri;
}
static boolean isSecure(Uri uri) {
return (!uri.isOpaque() && "https".equals(uri.getScheme()));
}
static boolean isHyperLink(Uri uri) {
return (!uri.isOpaque() &&
("http".equals(uri.getScheme()) || "https".equals(uri.getScheme())));
}
}