core: refactor http webclient part 3 #8529 (#7659)

Move encoding for results into BaseWebResult to prepare for class merging
This commit is contained in:
Cory 2020-03-14 17:52:06 -05:00 committed by ngosang
parent 4531a876eb
commit abb8526b76
6 changed files with 143 additions and 33 deletions

View File

@ -1,10 +1,12 @@
using System.Collections.Generic;
using System.Net;
using System.Text;
namespace Jackett.Common.Utils.Clients
{
public abstract class BaseWebResult
{
public Encoding Encoding { get; set; }
public HttpStatusCode Status { get; set; }
public string Cookies { get; set; }
public string RedirectingTo { get; set; }

View File

@ -7,6 +7,7 @@ using System.Net.Http;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using CloudflareSolverRe;
using com.LandonKey.SocksWebProxy;
@ -297,6 +298,40 @@ namespace Jackett.Common.Utils.Clients
result.Cookies = cookieBuilder.ToString().Trim();
}
ServerUtil.ResureRedirectIsFullyQualified(webRequest, result);
Encoding encoding = null;
if (webRequest.Encoding != null)
{
encoding = webRequest.Encoding;
}
else if (result.Headers.ContainsKey("content-type"))
{
var CharsetRegex = new Regex(@"charset=([\w-]+)", RegexOptions.Compiled);
var CharsetRegexMatch = CharsetRegex.Match(result.Headers["content-type"][0]);
if (CharsetRegexMatch.Success)
{
var charset = CharsetRegexMatch.Groups[1].Value;
try
{
encoding = Encoding.GetEncoding(charset);
}
catch (Exception ex)
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): Error loading encoding {2} based on header {3}: {4}", ClientType, webRequest.Url, charset, result.Headers["content-type"][0], ex));
}
}
else
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): Got header without charset: {2}", ClientType, webRequest.Url, result.Headers["content-type"][0]));
}
}
if (encoding == null)
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): No encoding detected, defaulting to UTF-8", ClientType, webRequest.Url));
encoding = Encoding.UTF8;
}
result.Encoding = encoding;
return result;
}
}

View File

@ -7,6 +7,7 @@ using System.Net.Http;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using CloudflareSolverRe;
using com.LandonKey.SocksWebProxy;
@ -317,6 +318,40 @@ namespace Jackett.Common.Utils.Clients
result.Cookies = cookieBuilder.ToString().Trim();
}
ServerUtil.ResureRedirectIsFullyQualified(webRequest, result);
Encoding encoding = null;
if (webRequest.Encoding != null)
{
encoding = webRequest.Encoding;
}
else if (result.Headers.ContainsKey("content-type"))
{
var CharsetRegex = new Regex(@"charset=([\w-]+)", RegexOptions.Compiled);
var CharsetRegexMatch = CharsetRegex.Match(result.Headers["content-type"][0]);
if (CharsetRegexMatch.Success)
{
var charset = CharsetRegexMatch.Groups[1].Value;
try
{
encoding = Encoding.GetEncoding(charset);
}
catch (Exception ex)
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): Error loading encoding {2} based on header {3}: {4}", ClientType, webRequest.Url, charset, result.Headers["content-type"][0], ex));
}
}
else
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): Got header without charset: {2}", ClientType, webRequest.Url, result.Headers["content-type"][0]));
}
}
if (encoding == null)
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): No encoding detected, defaulting to UTF-8", ClientType, webRequest.Url));
encoding = Encoding.UTF8;
}
result.Encoding = encoding;
return result;
}

View File

@ -7,6 +7,7 @@ using System.Net.Http;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using CloudflareSolverRe;
using com.LandonKey.SocksWebProxy;
@ -313,6 +314,40 @@ namespace Jackett.Common.Utils.Clients
result.Cookies = cookieBuilder.ToString().Trim();
}
ServerUtil.ResureRedirectIsFullyQualified(webRequest, result);
Encoding encoding = null;
if (webRequest.Encoding != null)
{
encoding = webRequest.Encoding;
}
else if (result.Headers.ContainsKey("content-type"))
{
var CharsetRegex = new Regex(@"charset=([\w-]+)", RegexOptions.Compiled);
var CharsetRegexMatch = CharsetRegex.Match(result.Headers["content-type"][0]);
if (CharsetRegexMatch.Success)
{
var charset = CharsetRegexMatch.Groups[1].Value;
try
{
encoding = Encoding.GetEncoding(charset);
}
catch (Exception ex)
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): Error loading encoding {2} based on header {3}: {4}", ClientType, webRequest.Url, charset, result.Headers["content-type"][0], ex));
}
}
else
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): Got header without charset: {2}", ClientType, webRequest.Url, result.Headers["content-type"][0]));
}
}
if (encoding == null)
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): No encoding detected, defaulting to UTF-8", ClientType, webRequest.Url));
encoding = Encoding.UTF8;
}
result.Encoding = encoding;
return result;
}

View File

@ -7,6 +7,7 @@ using System.Net.Http;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using CloudflareSolverRe;
using com.LandonKey.SocksWebProxy;
@ -296,6 +297,40 @@ namespace Jackett.Common.Utils.Clients
result.Cookies = cookieBuilder.ToString().Trim();
}
ServerUtil.ResureRedirectIsFullyQualified(webRequest, result);
Encoding encoding = null;
if (webRequest.Encoding != null)
{
encoding = webRequest.Encoding;
}
else if (result.Headers.ContainsKey("content-type"))
{
var CharsetRegex = new Regex(@"charset=([\w-]+)", RegexOptions.Compiled);
var CharsetRegexMatch = CharsetRegex.Match(result.Headers["content-type"][0]);
if (CharsetRegexMatch.Success)
{
var charset = CharsetRegexMatch.Groups[1].Value;
try
{
encoding = Encoding.GetEncoding(charset);
}
catch (Exception ex)
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): Error loading encoding {2} based on header {3}: {4}", ClientType, webRequest.Url, charset, result.Headers["content-type"][0], ex));
}
}
else
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): Got header without charset: {2}", ClientType, webRequest.Url, result.Headers["content-type"][0]));
}
}
if (encoding == null)
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): No encoding detected, defaulting to UTF-8", ClientType, webRequest.Url));
encoding = Encoding.UTF8;
}
result.Encoding = encoding;
return result;
}
}

View File

@ -115,42 +115,10 @@ namespace Jackett.Common.Utils.Clients
lastRequest = DateTime.Now;
result.Request = request;
var stringResult = Mapper.Map<WebClientStringResult>(result);
Encoding encoding = null;
if (request.Encoding != null)
{
encoding = request.Encoding;
}
else if (result.Headers.ContainsKey("content-type"))
{
var CharsetRegex = new Regex(@"charset=([\w-]+)", RegexOptions.Compiled);
var CharsetRegexMatch = CharsetRegex.Match(result.Headers["content-type"][0]);
if (CharsetRegexMatch.Success)
{
var charset = CharsetRegexMatch.Groups[1].Value;
try
{
encoding = Encoding.GetEncoding(charset);
}
catch (Exception ex)
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): Error loading encoding {2} based on header {3}: {4}", ClientType, request.Url, charset, result.Headers["content-type"][0], ex));
}
}
else
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): Got header without charset: {2}", ClientType, request.Url, result.Headers["content-type"][0]));
}
}
if (encoding == null)
{
logger.Error(string.Format("WebClient({0}).GetString(Url:{1}): No encoding detected, defaulting to UTF-8", ClientType, request.Url));
encoding = Encoding.UTF8;
}
string decodedContent = null;
if (result.Content != null)
decodedContent = encoding.GetString(result.Content);
decodedContent = result.Encoding.GetString(result.Content);
stringResult.ContentString = decodedContent;
logger.Debug(string.Format("WebClient({0}): Returning {1} => {2}", ClientType, result.Status, (result.IsRedirect ? result.RedirectingTo + " " : "") + (decodedContent == null ? "<NULL>" : decodedContent)));