2021-07-02 05:32:15 +00:00
|
|
|
package eu.faircode.email;
|
|
|
|
|
|
|
|
/*
|
|
|
|
This file is part of FairEmail.
|
|
|
|
|
|
|
|
FairEmail is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
FairEmail is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with FairEmail. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
2022-01-01 08:46:36 +00:00
|
|
|
Copyright 2018-2022 by Marcel Bokhorst (M66B)
|
2021-07-02 05:32:15 +00:00
|
|
|
*/
|
|
|
|
|
2021-07-20 20:16:20 +00:00
|
|
|
import android.content.Context;
|
2021-09-02 18:28:27 +00:00
|
|
|
import android.net.Uri;
|
2021-07-20 20:16:20 +00:00
|
|
|
import android.text.TextUtils;
|
2021-09-12 11:18:16 +00:00
|
|
|
import android.util.Base64;
|
2021-09-02 18:28:27 +00:00
|
|
|
import android.webkit.URLUtil;
|
2021-07-20 20:16:20 +00:00
|
|
|
|
2021-08-17 08:39:27 +00:00
|
|
|
import androidx.annotation.NonNull;
|
2021-09-09 06:23:55 +00:00
|
|
|
import androidx.core.util.PatternsCompat;
|
2021-08-17 08:39:27 +00:00
|
|
|
|
2021-07-20 20:16:20 +00:00
|
|
|
import java.io.BufferedReader;
|
|
|
|
import java.io.InputStream;
|
|
|
|
import java.io.InputStreamReader;
|
2021-09-12 11:18:16 +00:00
|
|
|
import java.net.URLDecoder;
|
|
|
|
import java.nio.charset.StandardCharsets;
|
|
|
|
import java.util.Arrays;
|
|
|
|
import java.util.Collections;
|
2021-07-20 20:16:20 +00:00
|
|
|
import java.util.HashSet;
|
2021-09-12 11:18:16 +00:00
|
|
|
import java.util.List;
|
2021-08-17 06:44:44 +00:00
|
|
|
import java.util.Locale;
|
2021-07-20 20:16:20 +00:00
|
|
|
|
2021-07-02 05:32:15 +00:00
|
|
|
public class UriHelper {
|
2021-07-20 20:16:20 +00:00
|
|
|
// https://publicsuffix.org/
|
|
|
|
private static final HashSet<String> suffixList = new HashSet<>();
|
|
|
|
|
2021-08-07 07:26:44 +00:00
|
|
|
// https://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.dat
|
|
|
|
private static final String SUFFIX_LIST_NAME = "public_suffix_list.dat";
|
2021-07-20 20:16:20 +00:00
|
|
|
|
2021-12-08 07:52:14 +00:00
|
|
|
// https://github.com/svenjacobs/leon
|
2021-09-12 11:18:16 +00:00
|
|
|
// https://github.com/newhouse/url-tracking-stripper
|
2021-12-08 07:52:14 +00:00
|
|
|
// https://maxchadwick.xyz/tracking-query-params-registry/
|
2021-09-12 11:18:16 +00:00
|
|
|
private static final List<String> PARANOID_QUERY = Collections.unmodifiableList(Arrays.asList(
|
|
|
|
// https://en.wikipedia.org/wiki/UTM_parameters
|
|
|
|
"awt_a", // AWeber
|
|
|
|
"awt_l", // AWeber
|
|
|
|
"awt_m", // AWeber
|
|
|
|
|
|
|
|
"icid", // Adobe
|
2021-12-08 07:52:14 +00:00
|
|
|
"ef_id", // https://experienceleague.adobe.com/docs/advertising-cloud/integrations/analytics/mc/mc-ids.html
|
|
|
|
"_ga", // Google Analytics
|
2021-09-12 11:18:16 +00:00
|
|
|
"gclid", // Google
|
|
|
|
"gclsrc", // Google ads
|
|
|
|
"dclid", // DoubleClick (Google)
|
|
|
|
"fbclid", // Facebook
|
|
|
|
"igshid", // Instagram
|
2021-12-08 07:52:14 +00:00
|
|
|
"msclkid", // https://help.ads.microsoft.com/apex/index/3/en/60000
|
2021-09-12 11:18:16 +00:00
|
|
|
|
|
|
|
"mc_cid", // MailChimp
|
|
|
|
"mc_eid", // MailChimp
|
|
|
|
|
|
|
|
"zanpid", // Zanox (Awin)
|
|
|
|
|
|
|
|
"kclickid" // https://support.freespee.com/hc/en-us/articles/202577831-Kenshoo-integration
|
|
|
|
));
|
|
|
|
|
|
|
|
// https://github.com/snarfed/granary/blob/master/granary/facebook.py#L1789
|
|
|
|
|
|
|
|
private static final List<String> FACEBOOK_WHITELIST_PATH = Collections.unmodifiableList(Arrays.asList(
|
|
|
|
"/nd/", "/n/", "/story.php"
|
|
|
|
));
|
|
|
|
|
|
|
|
private static final List<String> FACEBOOK_WHITELIST_QUERY = Collections.unmodifiableList(Arrays.asList(
|
|
|
|
"story_fbid", "fbid", "id", "comment_id"
|
|
|
|
));
|
|
|
|
|
2021-07-20 20:16:20 +00:00
|
|
|
static String getParentDomain(Context context, String host) {
|
2021-07-02 05:32:15 +00:00
|
|
|
if (host == null)
|
|
|
|
return null;
|
2021-08-17 06:44:44 +00:00
|
|
|
String parent = _getSuffix(context, host);
|
|
|
|
return (parent == null ? host : parent);
|
|
|
|
}
|
|
|
|
|
|
|
|
static boolean hasParentDomain(Context context, String host) {
|
|
|
|
return (host != null && _getSuffix(context, host) != null);
|
|
|
|
}
|
2021-07-02 05:32:15 +00:00
|
|
|
|
2021-08-17 08:39:27 +00:00
|
|
|
private static String _getSuffix(Context context, @NonNull String host) {
|
2021-07-20 20:16:20 +00:00
|
|
|
ensureSuffixList(context);
|
|
|
|
|
2021-08-17 06:44:44 +00:00
|
|
|
String h = host.toLowerCase(Locale.ROOT);
|
2021-07-20 20:16:20 +00:00
|
|
|
while (true) {
|
|
|
|
int dot = h.indexOf('.');
|
|
|
|
if (dot < 0)
|
2021-08-17 06:44:44 +00:00
|
|
|
return null;
|
|
|
|
|
2021-07-20 20:16:20 +00:00
|
|
|
String prefix = h.substring(0, dot);
|
|
|
|
h = h.substring(dot + 1);
|
2021-07-02 05:32:15 +00:00
|
|
|
|
2021-07-20 20:16:20 +00:00
|
|
|
int d = h.indexOf('.');
|
|
|
|
String w = (d < 0 ? null : '*' + h.substring(d));
|
|
|
|
|
|
|
|
synchronized (suffixList) {
|
2021-08-17 06:44:44 +00:00
|
|
|
if (!suffixList.contains('!' + h) &&
|
|
|
|
(suffixList.contains(h) || suffixList.contains(w))) {
|
2021-07-20 20:16:20 +00:00
|
|
|
String parent = prefix + "." + h;
|
2021-08-11 13:42:07 +00:00
|
|
|
Log.d("Host=" + host + " parent=" + parent);
|
2021-07-20 20:16:20 +00:00
|
|
|
return parent;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-07-02 05:32:15 +00:00
|
|
|
}
|
|
|
|
|
2021-07-02 07:51:56 +00:00
|
|
|
static String getEmailUser(String address) {
|
|
|
|
if (address == null)
|
|
|
|
return null;
|
|
|
|
|
|
|
|
int at = address.indexOf('@');
|
|
|
|
if (at > 0)
|
|
|
|
return address.substring(0, at);
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2021-07-02 05:32:15 +00:00
|
|
|
static String getEmailDomain(String address) {
|
|
|
|
if (address == null)
|
|
|
|
return null;
|
|
|
|
|
|
|
|
int at = address.indexOf('@');
|
|
|
|
if (at > 0)
|
|
|
|
return address.substring(at + 1);
|
|
|
|
|
2021-07-02 07:51:56 +00:00
|
|
|
return null;
|
2021-07-02 05:32:15 +00:00
|
|
|
}
|
2021-07-20 20:16:20 +00:00
|
|
|
|
2021-09-02 18:28:27 +00:00
|
|
|
static @NonNull
|
|
|
|
Uri guessScheme(@NonNull Uri uri) {
|
|
|
|
if (uri.getScheme() != null)
|
|
|
|
return uri;
|
|
|
|
|
|
|
|
String url = uri.toString();
|
|
|
|
if (Helper.EMAIL_ADDRESS.matcher(url).matches())
|
|
|
|
return Uri.parse("mailto:" + url);
|
2021-09-09 06:23:55 +00:00
|
|
|
else if (PatternsCompat.IP_ADDRESS.matcher(url).matches())
|
|
|
|
return Uri.parse("https://" + url);
|
2021-09-02 18:28:27 +00:00
|
|
|
else if (android.util.Patterns.PHONE.matcher(url).matches())
|
2021-09-09 06:23:55 +00:00
|
|
|
// Patterns.PHONE (\+[0-9]+[\- \.]*)?(\([0-9]+\)[\- \.]*)?([0-9][0-9\- \.]+[0-9])
|
|
|
|
// PhoneNumberUtils.isGlobalPhoneNumber() [\+]?[0-9.-]+
|
2021-09-02 18:28:27 +00:00
|
|
|
return Uri.parse("tel:" + url);
|
|
|
|
else {
|
|
|
|
Uri g = Uri.parse(URLUtil.guessUrl(url));
|
|
|
|
String scheme = g.getScheme();
|
|
|
|
if (scheme == null)
|
|
|
|
return uri;
|
|
|
|
else if ("http".equals(scheme))
|
|
|
|
scheme = "https";
|
|
|
|
return Uri.parse(scheme + "://" + url);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-28 18:07:17 +00:00
|
|
|
static int getSuffixCount(Context context) {
|
|
|
|
ensureSuffixList(context);
|
|
|
|
synchronized (suffixList) {
|
|
|
|
return suffixList.size();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private static void ensureSuffixList(Context context) {
|
2021-07-20 20:16:20 +00:00
|
|
|
synchronized (suffixList) {
|
|
|
|
if (suffixList.size() > 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
Log.i("Reading " + SUFFIX_LIST_NAME);
|
|
|
|
try (InputStream is = context.getAssets().open(SUFFIX_LIST_NAME)) {
|
|
|
|
BufferedReader br = new BufferedReader(new InputStreamReader((is)));
|
|
|
|
String line;
|
|
|
|
while ((line = br.readLine()) != null) {
|
|
|
|
line = line.trim();
|
|
|
|
if (TextUtils.isEmpty(line))
|
|
|
|
continue;
|
|
|
|
if (line.startsWith("//"))
|
|
|
|
continue;
|
|
|
|
suffixList.add(line);
|
|
|
|
}
|
|
|
|
Log.i(SUFFIX_LIST_NAME + "=" + suffixList.size());
|
|
|
|
} catch (Throwable ex) {
|
|
|
|
Log.e(ex);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-09-12 11:18:16 +00:00
|
|
|
|
|
|
|
static Uri sanitize(Uri uri) {
|
2021-09-12 11:37:10 +00:00
|
|
|
if (uri.isOpaque())
|
|
|
|
return uri;
|
2021-09-12 11:18:16 +00:00
|
|
|
|
|
|
|
Uri url;
|
2021-09-12 11:37:10 +00:00
|
|
|
boolean changed = false;
|
2021-09-12 11:18:16 +00:00
|
|
|
if (uri.getHost() != null &&
|
|
|
|
uri.getHost().endsWith("safelinks.protection.outlook.com") &&
|
|
|
|
!TextUtils.isEmpty(uri.getQueryParameter("url"))) {
|
|
|
|
changed = true;
|
|
|
|
url = Uri.parse(uri.getQueryParameter("url"));
|
|
|
|
} else if ("https".equals(uri.getScheme()) &&
|
|
|
|
"smex-ctp.trendmicro.com".equals(uri.getHost()) &&
|
|
|
|
"/wis/clicktime/v1/query".equals(uri.getPath()) &&
|
|
|
|
!TextUtils.isEmpty(uri.getQueryParameter("url"))) {
|
|
|
|
changed = true;
|
|
|
|
url = Uri.parse(uri.getQueryParameter("url"));
|
|
|
|
} else if ("https".equals(uri.getScheme()) &&
|
|
|
|
"www.google.com".equals(uri.getHost()) &&
|
|
|
|
uri.getPath() != null &&
|
|
|
|
uri.getPath().startsWith("/amp/")) {
|
|
|
|
// https://blog.amp.dev/2017/02/06/whats-in-an-amp-url/
|
|
|
|
Uri result = null;
|
|
|
|
|
|
|
|
String u = uri.toString();
|
|
|
|
u = u.replace("https://www.google.com/amp/", "");
|
|
|
|
|
|
|
|
int p = u.indexOf("/");
|
|
|
|
while (p > 0) {
|
|
|
|
String segment = u.substring(0, p);
|
|
|
|
if (segment.contains(".")) {
|
|
|
|
result = Uri.parse("https://" + u);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
u = u.substring(p + 1);
|
|
|
|
p = u.indexOf("/");
|
|
|
|
}
|
|
|
|
|
2022-01-04 18:03:21 +00:00
|
|
|
changed = (result != null);
|
|
|
|
url = (result == null ? uri : result);
|
|
|
|
} else if ("https".equals(uri.getScheme()) &&
|
|
|
|
uri.getHost() != null &&
|
|
|
|
uri.getHost().startsWith("www.google.") &&
|
|
|
|
uri.getQueryParameter("url") != null) {
|
|
|
|
// Google non-com redirects
|
|
|
|
Uri result = Uri.parse(uri.getQueryParameter("url"));
|
2021-09-12 11:18:16 +00:00
|
|
|
changed = (result != null);
|
|
|
|
url = (result == null ? uri : result);
|
|
|
|
} else if (uri.getQueryParameterNames().size() == 1) {
|
|
|
|
// Sophos Email Appliance
|
|
|
|
Uri result = null;
|
2021-09-12 12:39:21 +00:00
|
|
|
|
2021-09-12 11:18:16 +00:00
|
|
|
String key = uri.getQueryParameterNames().iterator().next();
|
|
|
|
if (TextUtils.isEmpty(uri.getQueryParameter(key)))
|
|
|
|
try {
|
|
|
|
String data = new String(Base64.decode(key, Base64.DEFAULT));
|
2021-09-12 12:39:21 +00:00
|
|
|
int v = data.indexOf("ver=");
|
2021-09-12 11:18:16 +00:00
|
|
|
int u = data.indexOf("&&url=");
|
2021-09-12 12:39:21 +00:00
|
|
|
if (v == 0 && u > 0)
|
2021-09-12 11:18:16 +00:00
|
|
|
result = Uri.parse(URLDecoder.decode(data.substring(u + 6), StandardCharsets.UTF_8.name()));
|
|
|
|
} catch (Throwable ex) {
|
|
|
|
Log.w(ex);
|
|
|
|
}
|
|
|
|
|
|
|
|
changed = (result != null);
|
|
|
|
url = (result == null ? uri : result);
|
|
|
|
} else
|
|
|
|
url = uri;
|
|
|
|
|
|
|
|
if (url.isOpaque())
|
|
|
|
return uri;
|
|
|
|
|
2021-09-12 11:37:10 +00:00
|
|
|
Uri.Builder builder = url.buildUpon();
|
2021-09-12 11:18:16 +00:00
|
|
|
|
|
|
|
builder.clearQuery();
|
|
|
|
String host = uri.getHost();
|
|
|
|
String path = uri.getPath();
|
|
|
|
if (host != null)
|
|
|
|
host = host.toLowerCase(Locale.ROOT);
|
|
|
|
if (path != null)
|
|
|
|
path = path.toLowerCase(Locale.ROOT);
|
|
|
|
boolean first = "www.facebook.com".equals(host);
|
|
|
|
for (String key : url.getQueryParameterNames()) {
|
|
|
|
// https://en.wikipedia.org/wiki/UTM_parameters
|
|
|
|
// https://docs.oracle.com/en/cloud/saas/marketing/eloqua-user/Help/EloquaAsynchronousTrackingScripts/EloquaTrackingParameters.htm
|
|
|
|
String lkey = key.toLowerCase(Locale.ROOT);
|
|
|
|
if (PARANOID_QUERY.contains(lkey) ||
|
|
|
|
lkey.startsWith("utm_") ||
|
|
|
|
lkey.startsWith("elq") ||
|
|
|
|
((host != null && host.endsWith("facebook.com")) &&
|
|
|
|
!first &&
|
|
|
|
FACEBOOK_WHITELIST_PATH.contains(path) &&
|
|
|
|
!FACEBOOK_WHITELIST_QUERY.contains(lkey)) ||
|
|
|
|
("store.steampowered.com".equals(host) &&
|
|
|
|
"snr".equals(lkey)))
|
|
|
|
changed = true;
|
|
|
|
else if (!TextUtils.isEmpty(key))
|
|
|
|
for (String value : url.getQueryParameters(key)) {
|
|
|
|
Log.i("Query " + key + "=" + value);
|
|
|
|
Uri suri = Uri.parse(value);
|
|
|
|
if ("http".equals(suri.getScheme()) || "https".equals(suri.getScheme())) {
|
|
|
|
Uri s = sanitize(suri);
|
|
|
|
if (s != null) {
|
|
|
|
changed = true;
|
|
|
|
value = s.toString();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
builder.appendQueryParameter(key, value);
|
|
|
|
}
|
|
|
|
first = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (changed ? builder.build() : null);
|
|
|
|
}
|
|
|
|
|
|
|
|
static Uri secure(Uri uri, boolean https) {
|
|
|
|
String scheme = uri.getScheme();
|
|
|
|
if (https ? "http".equals(scheme) : "https".equals(scheme)) {
|
|
|
|
Uri.Builder builder = uri.buildUpon();
|
|
|
|
builder.scheme(https ? "https" : "http");
|
|
|
|
|
|
|
|
String authority = uri.getEncodedAuthority();
|
|
|
|
if (authority != null) {
|
|
|
|
authority = authority.replace(https ? ":80" : ":443", https ? ":443" : ":80");
|
|
|
|
builder.encodedAuthority(authority);
|
|
|
|
}
|
|
|
|
|
|
|
|
return builder.build();
|
|
|
|
} else
|
|
|
|
return uri;
|
|
|
|
}
|
|
|
|
|
|
|
|
static boolean isSecure(Uri uri) {
|
|
|
|
return (!uri.isOpaque() && "https".equals(uri.getScheme()));
|
|
|
|
}
|
|
|
|
|
|
|
|
static boolean isHyperLink(Uri uri) {
|
|
|
|
return (!uri.isOpaque() &&
|
|
|
|
("http".equals(uri.getScheme()) || "https".equals(uri.getScheme())));
|
|
|
|
}
|
2021-07-02 05:32:15 +00:00
|
|
|
}
|