Convert Nordicbits to AngleSharp (#7401)

This commit is contained in:
Diego Heras 2020-03-01 07:45:17 +01:00 committed by GitHub
parent db669b31a1
commit 35f9e0fbe0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 67 additions and 55 deletions

View File

@ -7,7 +7,9 @@ using System.Reflection;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using CsQuery;
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using AngleSharp.Html.Parser;
using Jackett.Common.Helpers;
using Jackett.Common.Models;
using Jackett.Common.Models.IndexerConfig.Bespoke;
@ -37,7 +39,7 @@ namespace Jackett.Common.Indexers
private static string Directory => Path.Combine(Path.GetTempPath(), "Jackett", MethodBase.GetCurrentMethod().DeclaringType?.Name);
private readonly Dictionary<string, string> _emulatedBrowserHeaders = new Dictionary<string, string>();
private CQ _fDom;
private ConfigurationDataNordicbits ConfigData => (ConfigurationDataNordicbits)configData;
public Nordicbits(IIndexerConfigurationService configService, Utils.Clients.WebClient w, Logger l, IProtectionService ps)
@ -68,6 +70,7 @@ namespace Jackett.Common.Indexers
// Books
AddCategoryMapping("cat=54", TorznabCatType.AudioAudiobook, "Books - Audiobooks");
AddCategoryMapping("cat=9", TorznabCatType.BooksEbook, "Books - E-Books");
AddCategoryMapping("cat=84", TorznabCatType.BooksEbook, "Books - Education");
// Games
AddCategoryMapping("cat=24", TorznabCatType.PCGames, "Games - PC");
@ -91,6 +94,7 @@ namespace Jackett.Common.Indexers
AddCategoryMapping("cat=10", TorznabCatType.MoviesSD, "Movies - SD");
AddCategoryMapping("cat=23", TorznabCatType.MoviesSD, "Movies - MP4 Tablet");
AddCategoryMapping("cat=65", TorznabCatType.XXX, "Movies - Porn");
AddCategoryMapping("cat=90", TorznabCatType.MoviesHD, "Movies - No Nordic Subs");
// Music
AddCategoryMapping("cat=28", TorznabCatType.AudioLossless, "Music - FLAC");
@ -104,6 +108,27 @@ namespace Jackett.Common.Indexers
AddCategoryMapping("cat=48", TorznabCatType.TVUHD, "TV - HD-4K/2160p");
AddCategoryMapping("cat=57", TorznabCatType.TVUHD, "TV - HD-4K/2160p Boxset");
AddCategoryMapping("cat=11", TorznabCatType.TVSD, "TV - Boxset");
AddCategoryMapping("cat=80", TorznabCatType.TVSD, "TV - Danish Boxset");
AddCategoryMapping("cat=71", TorznabCatType.TVHD, "TV - Danish HD-1080p");
AddCategoryMapping("cat=72", TorznabCatType.TVHD, "TV - Danish HD-720p");
AddCategoryMapping("cat=73", TorznabCatType.TVSD, "TV - Danish SD");
AddCategoryMapping("cat=83", TorznabCatType.TVSD, "TV - Finnish Boxset");
AddCategoryMapping("cat=77", TorznabCatType.TVHD, "TV - Finnish HD-1080p");
AddCategoryMapping("cat=78", TorznabCatType.TVHD, "TV - Finnish HD-720p");
AddCategoryMapping("cat=79", TorznabCatType.TVSD, "TV - Finnish SD");
AddCategoryMapping("cat=89", TorznabCatType.TVSD, "TV - Nordic Boxset");
AddCategoryMapping("cat=86", TorznabCatType.TVHD, "TV - Nordic HD-1080p");
AddCategoryMapping("cat=87", TorznabCatType.TVHD, "TV - Nordic HD-720p");
AddCategoryMapping("cat=88", TorznabCatType.TVSD, "TV - Nordic SD");
AddCategoryMapping("cat=82", TorznabCatType.TVSD, "TV - Norwegian Boxset");
AddCategoryMapping("cat=74", TorznabCatType.TVHD, "TV - Norwegian HD-1080p");
AddCategoryMapping("cat=75", TorznabCatType.TVHD, "TV - Norwegian HD-720p");
AddCategoryMapping("cat=76", TorznabCatType.TVSD, "TV - Norwegian SD");
AddCategoryMapping("cat=81", TorznabCatType.TVSD, "TV - Swedish Boxset");
AddCategoryMapping("cat=68", TorznabCatType.TVHD, "TV - Swedish HD-1080p");
AddCategoryMapping("cat=69", TorznabCatType.TVHD, "TV - Swedish HD-720p");
AddCategoryMapping("cat=70", TorznabCatType.TVSD, "TV - Swedish SD");
AddCategoryMapping("cat=91", TorznabCatType.TVHD, "TV - No Nordic Subs");
AddCategoryMapping("cat=7", TorznabCatType.TVHD, "TV - HD-1080p");
AddCategoryMapping("cat=31", TorznabCatType.TVHD, "TV - HD-1080p Boxset");
AddCategoryMapping("cat=30", TorznabCatType.TVHD, "TV - HD-720p");
@ -257,7 +282,6 @@ namespace Jackett.Common.Indexers
protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query)
{
var releases = new List<ReleaseInfo>();
var torrentRowList = new List<CQ>();
var exactSearchTerm = query.GetQueryString();
var searchUrl = SearchUrl;
@ -293,38 +317,29 @@ namespace Jackett.Common.Indexers
// Getting results & Store content
var response = await RequestStringWithCookiesAndRetry(request, ConfigData.CookieHeader.Value);
_fDom = response.Content;
var parser = new HtmlParser();
var dom = parser.ParseDocument(response.Content);
try
{
var firstPageRows = FindTorrentRows();
// Add them to torrents list
torrentRowList.AddRange(firstPageRows.Select(fRow => fRow.Cq()));
var firstPageRows = FindTorrentRows(dom);
// If pagination available
int nbResults;
int pageLinkCount;
nbResults = 1;
pageLinkCount = 1;
// Check if we have a minimum of one result
if (firstPageRows.Length > 1)
if (firstPageRows?.Length > 1)
{
// Retrieve total count on our alone page
nbResults = firstPageRows.Count();
nbResults = firstPageRows.Length;
}
else
{
// Check if no result
if (torrentRowList.Count == 0)
{
// No results found
Output("\nNo result found for your query, please try another search term or change the theme you're currently using on the site as this is an unsupported solution...\n", "info");
// No result found for this query
break;
}
// No result found for this query
Output("\nNo result found for your query, please try another search term or change the theme you're currently using on the site as this is an unsupported solution...\n", "info");
break;
}
Output("\nFound " + nbResults + " result(s) (+/- " + firstPageRows.Length + ") in " + pageLinkCount + " page(s) for this query !");
@ -332,47 +347,47 @@ namespace Jackett.Common.Indexers
// Loop on results
foreach (var tRow in torrentRowList.Skip(1).Take(torrentRowList.Count - 2))
foreach (var row in firstPageRows.Skip(1).Take(firstPageRows.Length - 2))
{
Output("Torrent #" + (releases.Count + 1));
// ID
var idOrig = tRow.Find("td:eq(1) > a:eq(0)").Attr("href").Split('=')[1];
var idOrig = row.QuerySelector("td:nth-of-type(2) > a:nth-of-type(1)").GetAttribute("href").Split('=')[1];
var id = idOrig.Substring(0, idOrig.Length - 4);
Output("ID: " + id);
// Release Name
var name = tRow.Find("td:eq(1) > a:eq(0)").Text();
var name = row.QuerySelector("td:nth-of-type(2) > a:nth-of-type(1)").TextContent;
// Category
var categoryID = tRow.Find("td:eq(0) > a:eq(0)").Attr("href").Split('?').Last();
var newznab = MapTrackerCatToNewznab(categoryID);
Output("Category: " + (newznab.Count > 0 ? newznab.First().ToString() : "unknown category") + " (" + categoryID + ")");
var categoryId = row.QuerySelector("td:nth-of-type(1) > a:nth-of-type(1)").GetAttribute("href").Split('?').Last();
var newznab = MapTrackerCatToNewznab(categoryId);
Output("Category: " + (newznab.Count > 0 ? newznab.First().ToString() : "unknown category") + " (" + categoryId + ")");
// Seeders
var seeders = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(9)").Text(), @"\d+").Value);
var seeders = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(10)").TextContent, @"\d+").Value);
Output("Seeders: " + seeders);
// Leechers
var leechers = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(10)").Text(), @"\d+").Value);
var leechers = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(11)").TextContent, @"\d+").Value);
Output("Leechers: " + leechers);
// Files
var files = 1;
files = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(4)").Text(), @"\d+").Value);
files = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(5)").TextContent, @"\d+").Value);
Output("Files: " + files);
// Completed
var completed = ParseUtil.CoerceInt(Regex.Match(tRow.Find("td:eq(8)").Text(), @"\d+").Value);
var completed = ParseUtil.CoerceInt(Regex.Match(row.QuerySelector("td:nth-of-type(9)").TextContent, @"\d+").Value);
Output("Completed: " + completed);
// Size
var humanSize = tRow.Find("td:eq(7)").Text().ToLowerInvariant();
var humanSize = row.QuerySelector("td:nth-of-type(8)").TextContent.ToLowerInvariant();
var size = ReleaseInfo.GetBytes(humanSize);
Output("Size: " + humanSize + " (" + size + " bytes)");
// Publish DateToString
var dateTimeOrig = tRow.Find("td:eq(6)").Text();
var dateTimeOrig = row.QuerySelector("td:nth-of-type(7)").TextContent;
var datestr = Regex.Replace(dateTimeOrig, @"<[^>]+>|&nbsp;", "").Trim();
datestr = Regex.Replace(datestr, "Today", DateTime.Now.ToString("MMM dd yyyy"), RegexOptions.IgnoreCase);
datestr = Regex.Replace(datestr, "Yesterday", DateTime.Now.Date.AddDays(-1).ToString("MMM dd yyyy"), RegexOptions.IgnoreCase);
@ -388,7 +403,7 @@ namespace Jackett.Common.Indexers
Output("Comments Link: " + commentsLink.AbsoluteUri);
// Torrent Download URL
var passkey = tRow.Find("td:eq(2) > a:eq(0)").Attr("href");
var passkey = row.QuerySelector("td:nth-of-type(3) > a:nth-of-type(1)").GetAttribute("href");
var key = Regex.Match(passkey, "(?<=torrent_pass\\=)([a-zA-z0-9]*)");
var downloadLink = new Uri(TorrentDownloadUrl.Replace("{id}", id.ToString()).Replace("{passkey}", key.ToString()));
Output("Download Link: " + downloadLink.AbsoluteUri);
@ -412,14 +427,14 @@ namespace Jackett.Common.Indexers
};
// IMDB
var imdbLink = tRow.Find("a[href*=\"imdb.com/title/tt\"]").First().Attr("href");
var imdbLink = row.QuerySelector("a[href*=\"imdb.com/title/tt\"]")?.GetAttribute("href");
release.Imdb = ParseUtil.GetLongFromString(imdbLink);
if (tRow.Find("img[title=\"Free Torrent\"]").Length >= 1)
if (row.QuerySelector("img[title=\"Free Torrent\"]") != null)
release.DownloadVolumeFactor = 0;
else if (tRow.Find("img[title=\"Halfleech\"]").Length >= 1)
else if (row.QuerySelector("img[title=\"Halfleech\"]") != null)
release.DownloadVolumeFactor = 0.5;
else if (tRow.Find("img[title=\"90% Freeleech\"]").Length >= 1)
else if (row.QuerySelector("img[title=\"90% Freeleech\"]") != null)
release.DownloadVolumeFactor = 0.1;
else
release.DownloadVolumeFactor = 1;
@ -481,7 +496,7 @@ namespace Jackett.Common.Indexers
categoriesList[i] = categoriesList[i].Replace("cat=", "cats5[]=");
}
// Books
if (new[] { "54", "9" }.Any(c => categoriesList[i].Contains(categoriesList[i])))
if (new[] { "54", "9", "84" }.Any(c => categoriesList[i].Contains(categoriesList[i])))
{
categoriesList[i] = categoriesList[i].Replace("cat=", "cats6[]=");
}
@ -491,7 +506,7 @@ namespace Jackett.Common.Indexers
categoriesList[i] = categoriesList[i].Replace("cat=", "cats3[]=");
}
// Movies
if (new[] { "35", "42", "47", "15", "58", "16", "6", "21", "19", "22", "20", "25", "10", "23", "65" }.Any(c => categoriesList[i].Contains(categoriesList[i])))
if (new[] { "35", "42", "47", "15", "58", "16", "6", "21", "19", "22", "20", "25", "10", "23", "65", "90" }.Any(c => categoriesList[i].Contains(categoriesList[i])))
{
categoriesList[i] = categoriesList[i].Replace("cat=", "cats1[]=");
}
@ -501,7 +516,7 @@ namespace Jackett.Common.Indexers
categoriesList[i] = categoriesList[i].Replace("cat=", "cats4[]=");
}
// Series
if (new[] { "48", "57", "11", "7", "31", "30", "32", "5", "66" }.Any(c => categoriesList[i].Contains(categoriesList[i])))
if (new[] { "48", "57", "11", "80", "71", "72", "73", "83", "77", "78", "79", "89", "86", "87", "88", "82", "74", "75", "76", "81", "68", "69", "70", "91", "7", "31", "30", "32", "5", "66" }.Any(c => categoriesList[i].Contains(categoriesList[i])))
{
categoriesList[i] = categoriesList[i].Replace("cat=", "cats2[]=");
}
@ -674,35 +689,32 @@ namespace Jackett.Common.Indexers
/// <summary>
/// Find torrent rows in search pages
/// </summary>
/// <returns>JQuery Object</returns>
private CQ FindTorrentRows()
/// <returns>List of rows</returns>
private IHtmlCollection<IElement> FindTorrentRows(IHtmlDocument dom)
{
var defaultTheme = new[] { "/templates/1/", "/templates/2/", "/templates/3/", "/templates/4/", "/templates/5/", "/templates/6/", "/templates/11/", "/templates/12/" };
var oldV2 = new[] { "/templates/7/", "/templates/8/", "/templates/9/", "/templates/10/" };
var xmas = new[] { "/templates/14/" };
if (xmas.Any(_fDom.Document.Body.InnerHTML.Contains))
if (xmas.Any(dom.Body.InnerHtml.Contains))
{
// Return all occurencis of torrents found
// $('#base_around > table.mainouter > tbody > tr > td.outer > div.article > table > tbody:not(:first) > tr')
return _fDom["#base_around > table.mainouter > tbody > tr > td.outer > div.article > table > tbody:not(:first) > tr"];
// Return all occurrences of torrents found
return dom.QuerySelectorAll("#base_around > table.mainouter > tbody > tr > td.outer > div.article > table > tbody")[1].QuerySelectorAll("tr");
}
// template 7 contains a reference to template 2 (logout button), so check for oldV2 first
if (oldV2.Any(_fDom.Document.Body.InnerHTML.Contains))
if (oldV2.Any(dom.Body.InnerHtml.Contains))
{
// Return all occurencis of torrents found
// $('#base_content > table.mainouter > tbody > tr > td.outer > div.article > table > tbody > tr:not(:first)')
return _fDom["# base_content > table.mainouter > tbody > tr > td.outer > div.article > table > tbody > tr:not(:first)"];
// Return all occurrences of torrents found
return dom.QuerySelectorAll("#base_content > table.mainouter > tbody > tr > td.outer > div.article > table > tbody > tr").Skip(1).ToCollection();
}
if (defaultTheme.Any(_fDom.Document.Body.InnerHTML.Contains))
if (defaultTheme.Any(dom.Body.InnerHtml.Contains))
{
// Return all occurencis of torrents found
// $('#base_content2 > div.article > table > tbody:not(:first) > tr')
return _fDom["# base_content2 > div.article > table > tbody:not(:first) > tr"];
// Return all occurrences of torrents found
return dom.QuerySelectorAll("#base_content2 > div.article > table > tbody")[1].QuerySelectorAll("tr");
}
return _fDom;
return null;
}
/// <summary>