core: fix cookie parsing (part 2) (#8150)

* core: fix cookie parsing (part 2)

After fixing cookie storage in #8133 I noticed that I still have a lot of '.json.bak' files in the Jackett configuration folder. After deleting them they were created again in each request. The cause was we were parsing bad the cookies with '=' character in the value. Most Cloudflare cookies include if so we were sending bad cookies and solving the callenge in each request.

This PR should increase performance in several ways: we are not solving the challenge again (it takes time), we are not making extra requests and we are not updating the Jackett configuration in each request (both files '.json' and '.json.bak').

Tested with the client HttpWebClient2NetCore only. Please do some tests with the site 1337x.
This commit is contained in:
Diego Heras 2020-04-13 06:22:50 +02:00 committed by GitHub
parent 10737431aa
commit 9cbc78b1c2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 183 additions and 80 deletions

View File

@ -2,7 +2,6 @@ using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using AutoMapper;
using Jackett.Common.Models;
@ -350,9 +349,6 @@ namespace Jackett.Common.Indexers
public abstract class BaseWebIndexer : BaseIndexer, IWebIndexer
{
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie
private readonly Regex _cookieRegex = new Regex(@"([^\(\)<>@,;:\\""/\[\]\?=\{\}\s]+)=([^,;\\""\s]+)");
protected BaseWebIndexer(string name, string link, string description, IIndexerConfigurationService configService, WebClient client, Logger logger, ConfigurationData configData, IProtectionService p, TorznabCapabilities caps = null, string downloadBase = null)
: base(name, link, description, configService, logger, configData, p)
{
@ -610,17 +606,13 @@ namespace Jackett.Common.Indexers
private string ResolveCookies(string incomingCookies = "")
{
var redirRequestCookies = string.IsNullOrWhiteSpace(CookieHeader) ? incomingCookies : CookieHeader + " " + incomingCookies;
var cookieDictionary = new Dictionary<string, string>();
var matches = _cookieRegex.Match(redirRequestCookies);
while (matches.Success)
{
if (matches.Groups.Count > 2)
cookieDictionary[matches.Groups[1].Value] = matches.Groups[2].Value;
matches = matches.NextMatch();
}
return string.Join("; ", cookieDictionary
.Where(kv => kv.Key != "cf_use_ob" && kv.Key != "cf_ob_info") // These cookies are causing BadGateway errors, so we drop them, see issue #2306
.Select(kv => kv.Key.ToString() + "=" + kv.Value.ToString()).ToArray());
var cookieDictionary = CookieUtil.CookieHeaderToDictionary(redirRequestCookies);
// These cookies are causing BadGateway errors, so we drop them, see issue #2306
cookieDictionary.Remove("cf_use_ob");
cookieDictionary.Remove("cf_ob_info");
return CookieUtil.CookieDictionaryToHeader(cookieDictionary);
}
// Update CookieHeader with new cookies and save the config if something changed (e.g. a new CloudFlare clearance cookie was issued)

View File

@ -133,21 +133,14 @@ namespace Jackett.Common.Utils.Clients
ServicePointManager.SecurityProtocol = (SecurityProtocolType)192 | (SecurityProtocolType)768 | (SecurityProtocolType)3072;
var cookies = new CookieContainer();
if (!string.IsNullOrEmpty(webRequest.Cookies))
if (!string.IsNullOrWhiteSpace(webRequest.Cookies))
{
var uri = new Uri(webRequest.Url);
var cookieUrl = new Uri(uri.Scheme + "://" + uri.Host); // don't include the path, Scheme is needed for mono compatibility
foreach (var c in webRequest.Cookies.Split(';'))
{
try
{
cookies.SetCookies(cookieUrl, c.Trim());
}
catch (CookieException ex)
{
logger.Info("(Non-critical) Problem loading cookie {0}, {1}, {2}", uri, c, ex.Message);
}
}
// don't include the path, Scheme is needed for mono compatibility
var requestUri = new Uri(webRequest.Url);
var cookieUrl = new Uri(requestUri.Scheme + "://" + requestUri.Host);
var cookieDictionary = CookieUtil.CookieHeaderToDictionary(webRequest.Cookies);
foreach (var kv in cookieDictionary)
cookies.Add(cookieUrl, new Cookie(kv.Key, kv.Value));
}
var userAgent = webRequest.EmulateBrowser.Value ? BrowserUtil.ChromeUserAgent : "Jackett/" + configService.GetVersion();
@ -232,7 +225,7 @@ namespace Jackett.Common.Utils.Clients
}
// some cloudflare clients are using a refresh header
// Pull it out manually
// Pull it out manually
if (response.StatusCode == HttpStatusCode.ServiceUnavailable && response.Headers.Contains("Refresh"))
{
var refreshHeaders = response.Headers.GetValues("Refresh");

View File

@ -179,26 +179,16 @@ namespace Jackett.Common.Utils.Clients
// clear cookies from cookiecontainer
var oldCookies = cookies.GetCookies(request.RequestUri);
foreach (Cookie oldCookie in oldCookies)
{
oldCookie.Expired = true;
}
if (!string.IsNullOrEmpty(webRequest.Cookies))
// add cookies to cookiecontainer
if (!string.IsNullOrWhiteSpace(webRequest.Cookies))
{
// add cookies to cookiecontainer
var cookieUrl = new Uri(request.RequestUri.Scheme + "://" + request.RequestUri.Host); // don't include the path, Scheme is needed for mono compatibility
foreach (var ccookiestr in webRequest.Cookies.Split(';'))
{
var cookiestrparts = ccookiestr.Split('=');
var name = cookiestrparts[0].Trim();
if (string.IsNullOrWhiteSpace(name))
continue;
var value = "";
if (cookiestrparts.Length >= 2)
value = cookiestrparts[1].Trim();
var cookie = new Cookie(name, value);
cookies.Add(cookieUrl, cookie);
}
// don't include the path, Scheme is needed for mono compatibility
var cookieUrl = new Uri(request.RequestUri.Scheme + "://" + request.RequestUri.Host);
var cookieDictionary = CookieUtil.CookieHeaderToDictionary(webRequest.Cookies);
foreach (var kv in cookieDictionary)
cookies.Add(cookieUrl, new Cookie(kv.Key, kv.Value));
}
if (webRequest.Headers != null)
@ -254,7 +244,7 @@ namespace Jackett.Common.Utils.Clients
}
// some cloudflare clients are using a refresh header
// Pull it out manually
// Pull it out manually
if (response.StatusCode == System.Net.HttpStatusCode.ServiceUnavailable && response.Headers.Contains("Refresh"))
{
var refreshHeaders = response.Headers.GetValues("Refresh");

View File

@ -175,26 +175,16 @@ namespace Jackett.Common.Utils.Clients
// clear cookies from cookiecontainer
var oldCookies = cookies.GetCookies(request.RequestUri);
foreach (Cookie oldCookie in oldCookies)
{
oldCookie.Expired = true;
}
if (!string.IsNullOrEmpty(webRequest.Cookies))
// add cookies to cookiecontainer
if (!string.IsNullOrWhiteSpace(webRequest.Cookies))
{
// add cookies to cookiecontainer
var cookieUrl = new Uri(request.RequestUri.Scheme + "://" + request.RequestUri.Host); // don't include the path, Scheme is needed for mono compatibility
foreach (var ccookiestr in webRequest.Cookies.Split(';'))
{
var cookiestrparts = ccookiestr.Split('=');
var name = cookiestrparts[0].Trim();
if (string.IsNullOrWhiteSpace(name))
continue;
var value = "";
if (cookiestrparts.Length >= 2)
value = cookiestrparts[1].Trim();
var cookie = new Cookie(name, value);
cookies.Add(cookieUrl, cookie);
}
// don't include the path, Scheme is needed for mono compatibility
var cookieUrl = new Uri(request.RequestUri.Scheme + "://" + request.RequestUri.Host);
var cookieDictionary = CookieUtil.CookieHeaderToDictionary(webRequest.Cookies);
foreach (var kv in cookieDictionary)
cookies.Add(cookieUrl, new Cookie(kv.Key, kv.Value));
}
if (webRequest.Headers != null)

View File

@ -129,21 +129,14 @@ namespace Jackett.Common.Utils.Clients
ServicePointManager.SecurityProtocol = (SecurityProtocolType)192 | (SecurityProtocolType)768 | (SecurityProtocolType)3072;
var cookies = new CookieContainer();
if (!string.IsNullOrEmpty(webRequest.Cookies))
if (!string.IsNullOrWhiteSpace(webRequest.Cookies))
{
var uri = new Uri(webRequest.Url);
var cookieUrl = new Uri(uri.Scheme + "://" + uri.Host); // don't include the path, Scheme is needed for mono compatibility
foreach (var c in webRequest.Cookies.Split(';'))
{
try
{
cookies.SetCookies(cookieUrl, c.Trim());
}
catch (CookieException ex)
{
logger.Info("(Non-critical) Problem loading cookie {0}, {1}, {2}", uri, c, ex.Message);
}
}
// don't include the path, Scheme is needed for mono compatibility
var requestUri = new Uri(webRequest.Url);
var cookieUrl = new Uri(requestUri.Scheme + "://" + requestUri.Host);
var cookieDictionary = CookieUtil.CookieHeaderToDictionary(webRequest.Cookies);
foreach (var kv in cookieDictionary)
cookies.Add(cookieUrl, new Cookie(kv.Key, kv.Value));
}
var userAgent = webRequest.EmulateBrowser.Value ? BrowserUtil.ChromeUserAgent : "Jackett/" + configService.GetVersion();
@ -231,7 +224,7 @@ namespace Jackett.Common.Utils.Clients
}
// some cloudflare clients are using a refresh header
// Pull it out manually
// Pull it out manually
if (response.StatusCode == HttpStatusCode.ServiceUnavailable && response.Headers.Contains("Refresh"))
{
var refreshHeaders = response.Headers.GetValues("Refresh");

View File

@ -0,0 +1,41 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
namespace Jackett.Common.Utils
{
public static class CookieUtil
{
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie
// NOTE: we are not checking non-ascii characters and we should
private static readonly Regex _CookieRegex = new Regex(@"([^\(\)<>@,;:\\""/\[\]\?=\{\}\s]+)=([^,;\\""\s]+)");
private static readonly char[] InvalidKeyChars = {'(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?', '=', '{', '}', ' ', '\t', '\n'};
private static readonly char[] InvalidValueChars = {'"', ',', ';', '\\', ' ', '\t', '\n'};
public static Dictionary<string, string> CookieHeaderToDictionary(string cookieHeader)
{
var cookieDictionary = new Dictionary<string, string>();
if (cookieHeader == null)
return cookieDictionary;
var matches = _CookieRegex.Match(cookieHeader);
while (matches.Success)
{
if (matches.Groups.Count > 2)
cookieDictionary[matches.Groups[1].Value] = matches.Groups[2].Value;
matches = matches.NextMatch();
}
return cookieDictionary;
}
public static string CookieDictionaryToHeader(Dictionary<string, string> cookieDictionary)
{
if (cookieDictionary == null)
return "";
foreach (var kv in cookieDictionary)
if (kv.Key.IndexOfAny(InvalidKeyChars) > -1 || kv.Value.IndexOfAny(InvalidValueChars) > -1)
throw new FormatException($"The cookie '{kv.Key}={kv.Value}' is malformed.");
return string.Join("; ", cookieDictionary.Select(kv => kv.Key + "=" + kv.Value));
}
}
}

View File

@ -0,0 +1,104 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using Jackett.Common.Utils;
using NUnit.Framework;
using Assert = NUnit.Framework.Assert;
using CollectionAssert = NUnit.Framework.CollectionAssert;
namespace Jackett.Test.Utils
{
[TestFixture]
public class CookieUtilTests
{
[Test]
public void CookieHeaderToDictionaryGood()
{
// valid cookies with non-alpha characters in the value
var cookieHeader = "__cfduid=d6237f041586694295; __cf_bm=TlOng/xyqckk-TMen38z+0RFYA7YA=";
var expectedCookieDictionary = new Dictionary<string, string>
{
{"__cfduid", "d6237f041586694295"}, {"__cf_bm", "TlOng/xyqckk-TMen38z+0RFYA7YA="}
};
CollectionAssert.AreEqual(expectedCookieDictionary, CookieUtil.CookieHeaderToDictionary(cookieHeader));
}
[Test]
public void CookieHeaderToDictionaryDuplicateKeys()
{
// cookie with duplicate keys and whitespace separator instead of ;
// this cookie is not valid according to the standard, but it occurs in Jackett because we are concatenating
// cookies in many parts of the code (and we are not doing it well). this is safe because the whitespace
// can't be part of the key nor the value.
var cookieHeader = "__cfduid=d6237f041586694295; __cf_bm=TlOng/xyqckk-TMen38z+0RFYA7YA= __cf_bm=test";
var expectedCookieDictionary = new Dictionary<string, string>
{
{"__cfduid", "d6237f041586694295"},
{"__cf_bm", "test"} // we always assume the latest value is the most recent
};
CollectionAssert.AreEqual(expectedCookieDictionary, CookieUtil.CookieHeaderToDictionary(cookieHeader));
}
[Test]
public void CookieHeaderToDictionaryMalformed()
{
// malformed cookies
var cookieHeader = "__cfduidd6237f041586694295; __cf_;bm TlOng; good_cookie=value";
var expectedCookieDictionary = new Dictionary<string, string> {{"good_cookie", "value"},};
CollectionAssert.AreEqual(expectedCookieDictionary, CookieUtil.CookieHeaderToDictionary(cookieHeader));
}
[Test]
[SuppressMessage("ReSharper", "CollectionNeverUpdated.Local")]
public void CookieHeaderToDictionaryNull()
{
// null cookie header
var expectedCookieDictionary = new Dictionary<string, string>();
CollectionAssert.AreEqual(expectedCookieDictionary, CookieUtil.CookieHeaderToDictionary(null));
}
[Test]
public void CookieDictionaryToHeaderGood()
{
// valid cookies with non-alpha characters in the value
var cookieDictionary = new Dictionary<string, string>
{
{"__cfduid", "d6237f041586694295"}, {"__cf_bm", "TlOng/xyqckk-TMen38z+0RFYA7YA="}
};
var expectedCookieHeader = "__cfduid=d6237f041586694295; __cf_bm=TlOng/xyqckk-TMen38z+0RFYA7YA=";
CollectionAssert.AreEqual(expectedCookieHeader, CookieUtil.CookieDictionaryToHeader(cookieDictionary));
}
[Test]
public void CookieDictionaryToHeaderMalformed1()
{
// malformed key
var cookieDictionary = new Dictionary<string, string>
{
{"__cf_=bm", "34234234"}
};
var ex = Assert.Throws<FormatException>(() => CookieUtil.CookieDictionaryToHeader(cookieDictionary));
Assert.AreEqual( "The cookie '__cf_=bm=34234234' is malformed.", ex.Message);
}
[Test]
public void CookieDictionaryToHeaderMalformed2()
{
// malformed value
var cookieDictionary = new Dictionary<string, string>
{
{"__cf_bm", "34234 234"}
};
var ex = Assert.Throws<FormatException>(() => CookieUtil.CookieDictionaryToHeader(cookieDictionary));
Assert.AreEqual( "The cookie '__cf_bm=34234 234' is malformed.", ex.Message);
}
[Test]
public void CookieDictionaryToHeaderNull()
{
// null cookie dictionary
var expectedCookieHeader = "";
CollectionAssert.AreEqual(expectedCookieHeader, CookieUtil.CookieDictionaryToHeader(null));
}
}
}