From 42b6aaedc9ef5f640325d00b794eaef3fa25ac3f Mon Sep 17 00:00:00 2001 From: 6cUbi57z <3359745+6cUbi57z@users.noreply.github.com> Date: Sun, 18 Apr 2021 16:56:34 +0100 Subject: [PATCH] erairaws: add new public tracker. resolves #4116 (#11484) Also change release size for ShowRSS to 512MB. Co-authored-by: 6cUbi57z Co-authored-by: ilike2burnthing <59480337+ilike2burnthing@users.noreply.github.com> --- README.md | 1 + src/Jackett.Common/Indexers/EraiRaws.cs | 382 ++++++++++++++++++ src/Jackett.Common/Indexers/ShowRSS.cs | 2 +- .../Common/Indexers/EraiRawsTests.cs | 34 ++ 4 files changed, 418 insertions(+), 1 deletion(-) create mode 100644 src/Jackett.Common/Indexers/EraiRaws.cs create mode 100644 src/Jackett.Test/Common/Indexers/EraiRawsTests.cs diff --git a/README.md b/README.md index 8c6b5fd8b..6b2d07ac2 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,7 @@ A third-party Golang SDK for Jackett is available from [webtor-io/go-jackett](ht * E-Hentai * emtrek * Epizod + * Erai-Raws * ETTV * EXT Torrents * ExtraTorrent.cd diff --git a/src/Jackett.Common/Indexers/EraiRaws.cs b/src/Jackett.Common/Indexers/EraiRaws.cs new file mode 100644 index 000000000..8c7478383 --- /dev/null +++ b/src/Jackett.Common/Indexers/EraiRaws.cs @@ -0,0 +1,382 @@ +using System; +using System.Text; +using System.Text.RegularExpressions; +using System.Collections.Generic; +using System.Threading.Tasks; +using System.Linq; +using System.Xml; +using Jackett.Common.Models; +using Jackett.Common.Models.IndexerConfig; +using Jackett.Common.Services.Interfaces; +using Newtonsoft.Json.Linq; +using NLog; +using static Jackett.Common.Models.IndexerConfig.ConfigurationData; + +namespace Jackett.Common.Indexers +{ + public class EraiRaws : BaseWebIndexer + { + const string RSS_PATH = "rss-all-magnet"; + + private readonly IReadOnlyDictionary sizeEstimates = new Dictionary() { + { "1080p", 1332 }, // ~1.3GiB + { "720p", 700 }, + { "540p", 350 } + }; + + public EraiRaws(IIndexerConfigurationService configService, Utils.Clients.WebClient wc, Logger l, + IProtectionService ps, ICacheService cs) + : base(id: "erai-raws", + name: "Erai-Raws", + description: "Erai-Raws is a team release site for Anime subtitles.", + link: "https://www.erai-raws.info/", + caps: new TorznabCapabilities + { + TvSearchParams = new List + { + TvSearchParam.Q + } + }, + configService: configService, + client: wc, + logger: l, + p: ps, + cacheService: cs, + configData: new ConfigurationData()) + { + Encoding = Encoding.UTF8; + Language = "en-us"; + Type = "public"; + + // Add note that download stats are not available + configData.AddDynamic( + "download-stats-unavailable", + new DisplayInfoConfigurationItem("", "

Please note that the following stats are not available for this indexer. Default values are used instead.

  • Size
  • Seeders
  • Leechers
  • Download Factor
  • Upload Factor
") + ); + + // Config item for title detail parsing + configData.AddDynamic("title-detail-parsing", new BoolConfigurationItem("Enable Title Detail Parsing")); + configData.AddDynamic( + "title-detail-parsing-help", + new DisplayInfoConfigurationItem("", "Title Detail Parsing will attempt to determine the season and episode number from the release names and reformat them as a suffix in the format S1E1. If successful, this should provide better matching in applications such as Sonarr.") + ); + + // Configure the category mappings + AddCategoryMapping(1, TorznabCatType.TVAnime, "Anime - Sub"); + } + + private TitleParser titleParser = new TitleParser(); + + private bool IsTitleDetailParsingEnabled => ((BoolConfigurationItem)configData.GetDynamic("title-detail-parsing")).Value; + + public string RssFeedUri + { + get + { + return string.Concat(SiteLink, RSS_PATH); + } + } + + public override async Task ApplyConfiguration(JToken configJson) + { + LoadValuesFromJson(configJson); + var releases = await PerformQuery(new TorznabQuery()); + + await ConfigureIfOK(string.Empty, releases.Any(), () => + throw new Exception("Could not find releases from this URL")); + + return IndexerConfigurationStatus.Completed; + } + + protected override async Task> PerformQuery(TorznabQuery query) + { + var feedItems = await GetItemsFromFeed(); + var eraiRawsReleaseInfo = ConvertFeedItemsToEraiRawsReleaseInfo(feedItems); + + // Perform basic filter within Jackett + var filteredItems = FilterForQuery(query, eraiRawsReleaseInfo); + + // Convert to release info + return ConvertEraiRawsInfoToJackettInfo(filteredItems); + } + + private async Task> GetItemsFromFeed() + { + // Retrieve RSS feed + var result = await RequestWithCookiesAndRetryAsync(RssFeedUri); + + // Parse as XML document + var xmlDocument = new XmlDocument(); + xmlDocument.LoadXml(result.ContentString); + + // Parse to RssFeedItems + var xmlNodes = xmlDocument.GetElementsByTagName("item"); + List feedItems = new List(); + foreach (var n in xmlNodes) + { + var node = (XmlNode)n; + + if (RssFeedItem.TryParse(node, out RssFeedItem item)) + { + feedItems.Add(item); + } + else + { + logger.Warn($"Could not parse {DisplayName} RSS item '{node.InnerText}'"); + } + } + + return feedItems; + } + + private IEnumerable ConvertFeedItemsToEraiRawsReleaseInfo(IEnumerable feedItems) + { + foreach (var fi in feedItems) + { + EraiRawsReleaseInfo releaseInfo = new EraiRawsReleaseInfo(fi); + + // Validate the release + if (releaseInfo.PublishDate == null) + { + logger.Warn($"Failed to parse {DisplayName} RSS feed item '{fi.Title}' due to malformed publish date."); + continue; + } + + if (releaseInfo.Link == null) + { + logger.Warn($"Failed to parse {DisplayName} RSS feed item '{fi.Title}' due to malformed link URI."); + continue; + } + + // If enabled, perform detailed title parsing + if (IsTitleDetailParsingEnabled) + { + releaseInfo.Title = titleParser.Parse(releaseInfo.Title); + } + + yield return releaseInfo; + } + } + + private static IEnumerable FilterForQuery(TorznabQuery query, IEnumerable feedItems) + { + foreach (var fi in feedItems) + { + if (!query.MatchQueryStringAND(fi.Title)) + continue; + + yield return fi; + } + } + + private IEnumerable ConvertEraiRawsInfoToJackettInfo(IEnumerable feedItems) + { + foreach (var fi in feedItems) + { + yield return new ReleaseInfo + { + Title = string.Concat(fi.Title, " - ", fi.Quality), + Guid = fi.Link, + MagnetUri = fi.Link, + PublishDate = fi.PublishDate.Value.ToLocalTime().DateTime, + Category = MapTrackerCatToNewznab("1"), + + // Download stats are not available through scraping so set some mock values. + Size = GetSizeEstimate(fi), + Seeders = 1, + Peers = 2, + DownloadVolumeFactor = 0, + UploadVolumeFactor = 1 + }; + } + } + + /// + /// Get an estimate of the file size based on the release info. + /// + /// + /// These estimates are currently only based on Quality. They will be very inaccurate for batch releases. + /// + private long GetSizeEstimate(EraiRawsReleaseInfo releaseInfo) + { + long sizeEstimateInMiB = 256; + if (sizeEstimates.ContainsKey(releaseInfo.Quality.ToLower())) + { + sizeEstimateInMiB = sizeEstimates[releaseInfo.Quality.ToLower()]; + } + + // Convert to bytes and return + return sizeEstimateInMiB * (1024 * 1024); + } + + private static string PrefixOrDefault(string prefix, string value, string def = "") + { + if (string.IsNullOrWhiteSpace(value)) + { + return def; + } + else + { + return string.Concat(prefix, value); + } + } + + /// + /// Raw RSS feed item containing the data as received. + /// + private class RssFeedItem + { + public static bool TryParse(XmlNode rssItem, out RssFeedItem item) + { + var title = rssItem.SelectSingleNode("title")?.InnerText; + var link = rssItem.SelectSingleNode("link")?.InnerText; + var publishDate = rssItem.SelectSingleNode("pubDate")?.InnerText; + + if (string.IsNullOrWhiteSpace(title) || + string.IsNullOrWhiteSpace(link) || + string.IsNullOrWhiteSpace(publishDate)) + { + // One of the properties was empty so fail to parse + item = null; + return false; + } + + item = new RssFeedItem(title, link, publishDate); + return true; + } + + private RssFeedItem(string title, string link, string publishDate) + { + Title = title; + Link = link; + PublishDate = publishDate; + } + + public string Title { get; set; } + + public string Link { get; } + + public string PublishDate { get; } + } + + /// + /// Details of an EraiRaws release + /// + private class EraiRawsReleaseInfo + { + public EraiRawsReleaseInfo(RssFeedItem feedItem) + { + var splitTitle = SplitQualityAndTitle(feedItem.Title); + + Quality = splitTitle.quality; + Title = splitTitle.title; + + if (Uri.TryCreate(feedItem.Link, UriKind.Absolute, out Uri magnetUri)) + { + Link = magnetUri; + } + + if (DateTimeOffset.TryParse(feedItem.PublishDate, out DateTimeOffset publishDate)) + { + PublishDate = publishDate; + } + } + + private (string quality, string title) SplitQualityAndTitle(string rawTitle) + { + var match = Regex.Match(rawTitle, @"^\[(?[0-9]+[ip])\] (?.*)$", RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(0.5)); + if (match.Success) + { + return (match.Groups["quality"].Value, match.Groups["title"].Value); + } + + return (string.Empty, rawTitle); + } + + public string Quality { get; } + + public string Title { get; set; } + + public Uri Link { get; } + + public DateTimeOffset? PublishDate { get; } + } + + public class TitleParser + { + private readonly Dictionary<string, string> DETAIL_SEARCH_SEASON = new Dictionary<string, string> { + { " Season (?<detail>[0-9]+)", "" }, // "Season 2" + { " (?<detail>[0-9]+)(st|nd|rd|th) Season", "" }, // "2nd Season" + { " Part (?<detail>[0-9]+) – ", " – " }, // "<title> Part 2 – <episode>" + { " (?<detail>[0-9]+) – ", " – " } // "<title> 2 – <episode>" - NOT A HYPHEN! + }; + + private readonly Dictionary<string, string> DETAIL_SEARCH_EPISODE = new Dictionary<string, string> { + { " – (?<detail>[0-9]+)$", " – " }, // "<title> – <episode>" <end_of_title> - NOT A HYPHEN! + { " – (?<detail>[0-9]+) ", " – " } // "<title> – <episode> ..." - NOT A HYPHEN! + }; + + public string Parse(string title) + { + var results = SearchTitleForDetails(title, new Dictionary<string, Dictionary<string, string>> { + { "episode", DETAIL_SEARCH_EPISODE }, + { "season", DETAIL_SEARCH_SEASON } + }); + + var seasonEpisodeIdentifier = string.Concat( + PrefixOrDefault("S", results.details["season"]).Trim(), + PrefixOrDefault("E", results.details["episode"]).Trim() + ); + + // If title still contains the strange hyphen, insert the identifier after it. Otherwise put it at the end. + int strangeHyphenPosition = results.strippedTitle.LastIndexOf("–"); + if (strangeHyphenPosition > -1) + { + return string.Concat( + results.strippedTitle.Substring(0, strangeHyphenPosition).Trim(), + " – ", + seasonEpisodeIdentifier, + " ", + results.strippedTitle.Substring(strangeHyphenPosition + 1).Trim() + ).Trim(); + } + + return string.Concat( + results.strippedTitle.Trim(), + " ", + seasonEpisodeIdentifier + ).Trim(); + } + + private static (string strippedTitle, Dictionary<string, string> details) SearchTitleForDetails(string title, Dictionary<string, Dictionary<string,string>> definition) + { + Dictionary<string, string> details = new Dictionary<string, string>(); + foreach (var search in definition) + { + var searchResult = SearchTitleForDetail(title, search.Value); + details.Add(search.Key, searchResult.detail); + title = searchResult.strippedTitle; + } + + return (title, details); + } + + private static (string strippedTitle, string detail) SearchTitleForDetail(string title, Dictionary<string, string> searchReplacePatterns) + { + foreach (var srp in searchReplacePatterns) + { + var match = Regex.Match(title, srp.Key, RegexOptions.IgnoreCase, TimeSpan.FromMilliseconds(0.5)); + if (match.Success) + { + string detail = match.Groups["detail"].Value; + var strippedTitle = Regex.Replace(title, srp.Key, srp.Value, RegexOptions.IgnoreCase); + return (strippedTitle, detail); + } + } + + // Nothing found so return null + return (title, ""); + } + } + } +} diff --git a/src/Jackett.Common/Indexers/ShowRSS.cs b/src/Jackett.Common/Indexers/ShowRSS.cs index 09fe34a99..6a60880c5 100644 --- a/src/Jackett.Common/Indexers/ShowRSS.cs +++ b/src/Jackett.Common/Indexers/ShowRSS.cs @@ -102,7 +102,7 @@ namespace Jackett.Common.Indexers PublishDate = publishDate, InfoHash = infoHash, MagnetUri = magnetUri, - Size = 0, + Size = 512, Seeders = 1, Peers = 2, DownloadVolumeFactor = 0, diff --git a/src/Jackett.Test/Common/Indexers/EraiRawsTests.cs b/src/Jackett.Test/Common/Indexers/EraiRawsTests.cs new file mode 100644 index 000000000..66dca1ac4 --- /dev/null +++ b/src/Jackett.Test/Common/Indexers/EraiRawsTests.cs @@ -0,0 +1,34 @@ +using System; +using System.Collections; +using Jackett.Common.Indexers; +using NUnit.Framework; +using Assert = NUnit.Framework.Assert; + +namespace Jackett.Test.Common.Indexers +{ + [TestFixture] + public class EraiRawsTests + { + [TestCaseSource(typeof(TitleParserTestData), nameof(TitleParserTestData.TestCases))] + public string TestTitleParsing(string title) + { + var titleParser = new EraiRaws.TitleParser(); + return titleParser.Parse(title); + } + } + + public class TitleParserTestData + { + public static IEnumerable TestCases + { + get + { + yield return new TestCaseData("[1080p] Tokyo Revengers").Returns("[1080p] Tokyo Revengers"); + yield return new TestCaseData("[1080p] Tokyo Revengers – 02").Returns("[1080p] Tokyo Revengers – E02"); + yield return new TestCaseData("[1080p] Mairimashita! Iruma-kun 2nd Season – 01").Returns("[1080p] Mairimashita! Iruma-kun – S2E01"); + yield return new TestCaseData("[540p] Seijo no Maryoku wa Bannou Desu – 02 v2 (Multi)").Returns("[540p] Seijo no Maryoku wa Bannou Desu – E02 v2 (Multi)"); + yield return new TestCaseData("[1080p] Yuukoku no Moriarty Part 2 – 01 (Multi)").Returns("[1080p] Yuukoku no Moriarty – S2E01 (Multi)"); + } + } + } +}