mirror of https://github.com/Jackett/Jackett
rutracker/toloka: improve title cleaning (#13944)
This commit is contained in:
parent
470b18d664
commit
14bcfcc018
|
@ -22,24 +22,21 @@ namespace Jackett.Common.Indexers
|
|||
[ExcludeFromCodeCoverage]
|
||||
public class RuTracker : BaseWebIndexer
|
||||
{
|
||||
public override string[] AlternativeSiteLinks { get; protected set; } = {
|
||||
"https://rutracker.org/",
|
||||
"https://rutracker.net/",
|
||||
"https://rutracker.nl/"
|
||||
};
|
||||
private new ConfigurationDataRutracker configData => (ConfigurationDataRutracker)base.configData;
|
||||
|
||||
private readonly TitleParser _titleParser = new TitleParser();
|
||||
private string LoginUrl => SiteLink + "forum/login.php";
|
||||
private string SearchUrl => SiteLink + "forum/tracker.php";
|
||||
|
||||
private string _capSid;
|
||||
private string _capCodeField;
|
||||
|
||||
private new ConfigurationDataRutracker configData => (ConfigurationDataRutracker)base.configData;
|
||||
|
||||
public override string[] AlternativeSiteLinks { get; protected set; } = {
|
||||
"https://rutracker.org/",
|
||||
"https://rutracker.net/",
|
||||
"https://rutracker.nl/"
|
||||
};
|
||||
|
||||
private Regex _regexToFindTagsInReleaseTitle = new Regex(@"\[[^\[]+\]|\([^(]+\)");
|
||||
|
||||
public RuTracker(IIndexerConfigurationService configService, WebClient wc, Logger l, IProtectionService ps,
|
||||
ICacheService cs)
|
||||
public RuTracker(IIndexerConfigurationService configService, WebClient wc, Logger l, IProtectionService ps, ICacheService cs)
|
||||
: base(id: "rutracker",
|
||||
name: "RuTracker",
|
||||
description: "RuTracker is a Semi-Private Russian torrent site with a thriving file-sharing community",
|
||||
|
@ -74,6 +71,7 @@ namespace Jackett.Common.Indexers
|
|||
Encoding = Encoding.GetEncoding("windows-1251");
|
||||
Language = "ru-RU";
|
||||
Type = "semi-private";
|
||||
|
||||
// note: when refreshing the categories use the tracker.php page and NOT the search.php page!
|
||||
AddCategoryMapping(22, TorznabCatType.Movies, "Наше кино");
|
||||
AddCategoryMapping(941, TorznabCatType.Movies, "|- Кино СССР");
|
||||
|
@ -1389,17 +1387,15 @@ namespace Jackett.Common.Indexers
|
|||
var response = await RequestWithCookiesAsync(LoginUrl);
|
||||
var parser = new HtmlParser();
|
||||
var doc = parser.ParseDocument(response.ContentString);
|
||||
var captchaimg = doc.QuerySelector("img[src^=\"https://static.t-ru.org/captcha/\"]");
|
||||
var captchaimg = doc.QuerySelector("img[src^=\"https://static.rutracker.cc/captcha/\"]");
|
||||
|
||||
if (captchaimg != null)
|
||||
{
|
||||
var captchaImage = await RequestWithCookiesAsync(captchaimg.GetAttribute("src"));
|
||||
configData.CaptchaImage.Value = captchaImage.ContentBytes;
|
||||
|
||||
var codefield = doc.QuerySelector("input[name^=\"cap_code_\"]");
|
||||
_capCodeField = codefield.GetAttribute("name");
|
||||
|
||||
var sidfield = doc.QuerySelector("input[name=\"cap_sid\"]");
|
||||
_capSid = sidfield.GetAttribute("value");
|
||||
_capCodeField = doc.QuerySelector("input[name^=\"cap_code_\"]")?.GetAttribute("name");
|
||||
_capSid = doc.QuerySelector("input[name=\"cap_sid\"]")?.GetAttribute("value");
|
||||
}
|
||||
else
|
||||
configData.CaptchaImage.Value = null;
|
||||
|
@ -1517,6 +1513,7 @@ namespace Jackett.Common.Indexers
|
|||
queryCollection.Add("f", string.Join(",", MapTorznabCapsToTrackers(query)));
|
||||
|
||||
var searchUrl = SearchUrl + "?" + queryCollection.GetQueryString();
|
||||
|
||||
return searchUrl;
|
||||
}
|
||||
|
||||
|
@ -1541,6 +1538,7 @@ namespace Jackett.Common.Indexers
|
|||
var qDetailsLink = row.QuerySelector("td.t-title-col > div.t-title > a.tLink");
|
||||
var details = new Uri(SiteLink + "forum/" + qDetailsLink.GetAttribute("href"));
|
||||
|
||||
var title = qDetailsLink.TextContent.Trim();
|
||||
var category = GetCategoryOfRelease(row);
|
||||
|
||||
var size = GetSizeOfRelease(row);
|
||||
|
@ -1556,7 +1554,14 @@ namespace Jackett.Common.Indexers
|
|||
{
|
||||
MinimumRatio = 1,
|
||||
MinimumSeedTime = 0,
|
||||
Title = qDetailsLink.TextContent,
|
||||
Title = _titleParser.Parse(
|
||||
title,
|
||||
category,
|
||||
configData.StripRussianLetters.Value,
|
||||
configData.MoveAllTagsToEndOfReleaseTitle.Value,
|
||||
configData.MoveFirstTagsToEndOfReleaseTitle.Value
|
||||
),
|
||||
Description = title,
|
||||
Details = details,
|
||||
Link = link,
|
||||
Guid = details,
|
||||
|
@ -1570,60 +1575,6 @@ namespace Jackett.Common.Indexers
|
|||
UploadVolumeFactor = 1
|
||||
};
|
||||
|
||||
// TODO finish extracting release variables to simplify release initialization
|
||||
if (IsAnyTvCategory(release.Category))
|
||||
{
|
||||
// extract season and episodes
|
||||
var regex = new Regex(".+\\/\\s([^а-яА-я\\/]+)\\s\\/.+Сезон\\s*[:]*\\s+(\\d+).+(?:Серии|Эпизод)+\\s*[:]*\\s+(\\d+-*\\d*).+,\\s+(.+)\\][\\s]?(.*)");
|
||||
|
||||
//replace double 4K quality in title
|
||||
release.Title = release.Title.Replace(", 4K]", "]");
|
||||
|
||||
var title = regex.Replace(release.Title, "$1 - S$2E$3 - rus $4 $5");
|
||||
title = Regex.Replace(title, "-Rip", "Rip", RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, "WEB-DLRip", "WEBDL", RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, "WEB-DL", "WEBDL", RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, "HDTVRip", "HDTV", RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, "Кураж-Бамбей", "kurazh", RegexOptions.IgnoreCase);
|
||||
|
||||
release.Title = title;
|
||||
}
|
||||
else if (IsAnyMovieCategory(release.Category))
|
||||
{
|
||||
// remove director's name from title
|
||||
// rutracker movies titles look like: russian name / english name (russian director / english director) other stuff
|
||||
// Ирландец / The Irishman (Мартин Скорсезе / Martin Scorsese) [2019, США, криминал, драма, биография, WEB-DL 1080p] Dub (Пифагор) + MVO (Jaskier) + AVO (Юрий Сербин) + Sub Rus, Eng + Original Eng
|
||||
// this part should be removed: (Мартин Скорсезе / Martin Scorsese)
|
||||
var director = new Regex(@"(\([А-Яа-яЁё\W]+)\s/\s(.+?)\)");
|
||||
release.Title = director.Replace(release.Title, "");
|
||||
|
||||
// Bluray quality fix: radarr parse Blu-ray Disc as Bluray-1080p but should be BR-DISK
|
||||
release.Title = Regex.Replace(release.Title, "Blu-ray Disc", "BR-DISK", RegexOptions.IgnoreCase);
|
||||
// language fix: all rutracker releases contains russian track
|
||||
if (release.Title.IndexOf("rus", StringComparison.OrdinalIgnoreCase) < 0)
|
||||
release.Title += " rus";
|
||||
}
|
||||
|
||||
if (configData.StripRussianLetters.Value)
|
||||
{
|
||||
var regex = new Regex(@"(\([А-Яа-яЁё\W]+\))|(^[А-Яа-яЁё\W\d]+\/ )|([а-яА-ЯЁё \-]+,+)|([а-яА-ЯЁё]+)");
|
||||
release.Title = regex.Replace(release.Title, "");
|
||||
}
|
||||
|
||||
if (configData.MoveAllTagsToEndOfReleaseTitle.Value)
|
||||
{
|
||||
release.Title = MoveAllTagsToEndOfReleaseTitle(release.Title);
|
||||
}
|
||||
else if (configData.MoveFirstTagsToEndOfReleaseTitle.Value)
|
||||
{
|
||||
release.Title = MoveFirstTagsToEndOfReleaseTitle(release.Title);
|
||||
}
|
||||
|
||||
if (release.Category.Contains(TorznabCatType.Audio.ID))
|
||||
{
|
||||
release.Title = DetectRereleaseInReleaseTitle(release.Title);
|
||||
}
|
||||
|
||||
return release;
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
@ -1639,7 +1590,7 @@ namespace Jackett.Common.Indexers
|
|||
var qSeeders = row.QuerySelector("td:nth-child(7)");
|
||||
if (qSeeders != null && !qSeeders.TextContent.Contains("дн"))
|
||||
{
|
||||
var seedersString = qSeeders.QuerySelector("b").TextContent;
|
||||
var seedersString = qSeeders.QuerySelector("b")?.TextContent.Trim();
|
||||
if (!string.IsNullOrWhiteSpace(seedersString))
|
||||
seeders = ParseUtil.CoerceInt(seedersString);
|
||||
}
|
||||
|
@ -1648,107 +1599,186 @@ namespace Jackett.Common.Indexers
|
|||
|
||||
private ICollection<int> GetCategoryOfRelease(in IElement row)
|
||||
{
|
||||
var forum = row.QuerySelector("td.f-name-col > div.f-name > a");
|
||||
var forumid = forum.GetAttribute("href").Split('=')[1];
|
||||
return MapTrackerCatToNewznab(forumid);
|
||||
var forum = row.QuerySelector("td.f-name-col > div.f-name > a")?.GetAttribute("href");
|
||||
var cat = ParseUtil.GetArgumentFromQueryString(forum, "f");
|
||||
|
||||
return MapTrackerCatToNewznab(cat);
|
||||
}
|
||||
|
||||
private long GetSizeOfRelease(in IElement row)
|
||||
{
|
||||
var qSize = row.QuerySelector("td.tor-size");
|
||||
var size = ReleaseInfo.GetBytes(qSize.GetAttribute("data-ts_text"));
|
||||
return size;
|
||||
}
|
||||
private long GetSizeOfRelease(in IElement row) => ReleaseInfo.GetBytes(row.QuerySelector("td.tor-size")?.GetAttribute("data-ts_text"));
|
||||
|
||||
private DateTime GetPublishDateOfRelease(in IElement row)
|
||||
{
|
||||
var timestr = row.QuerySelector("td:nth-child(10)").GetAttribute("data-ts_text");
|
||||
var publishDate = DateTimeUtil.UnixTimestampToDateTime(long.Parse(timestr));
|
||||
return publishDate;
|
||||
}
|
||||
private DateTime GetPublishDateOfRelease(in IElement row) => DateTimeUtil.UnixTimestampToDateTime(long.Parse(row.QuerySelector("td:nth-child(10)")?.GetAttribute("data-ts_text")));
|
||||
|
||||
private bool IsAnyTvCategory(ICollection<int> category)
|
||||
public class TitleParser
|
||||
{
|
||||
return category.Contains(TorznabCatType.TV.ID)
|
||||
|| TorznabCatType.TV.SubCategories.Any(subCat => category.Contains(subCat.ID));
|
||||
}
|
||||
|
||||
private bool IsAnyMovieCategory(ICollection<int> category)
|
||||
{
|
||||
return category.Contains(TorznabCatType.Movies.ID)
|
||||
|| TorznabCatType.Movies.SubCategories.Any(subCat => category.Contains(subCat.ID));
|
||||
}
|
||||
|
||||
private string MoveAllTagsToEndOfReleaseTitle(string input)
|
||||
{
|
||||
var output = input + " ";
|
||||
foreach (Match match in _regexToFindTagsInReleaseTitle.Matches(input))
|
||||
private static readonly List<Regex> _FindTagsInTitlesRegexList = new List<Regex>
|
||||
{
|
||||
var tag = match.ToString();
|
||||
output = output.Replace(tag, "") + tag;
|
||||
}
|
||||
output = output.Trim();
|
||||
return output;
|
||||
}
|
||||
new Regex(@"\((?>\((?<c>)|[^()]+|\)(?<-c>))*(?(c)(?!))\)"),
|
||||
new Regex(@"\[(?>\[(?<c>)|[^\[\]]+|\](?<-c>))*(?(c)(?!))\]")
|
||||
};
|
||||
|
||||
private string MoveFirstTagsToEndOfReleaseTitle(string input)
|
||||
{
|
||||
var output = input + " ";
|
||||
var expectedIndex = 0;
|
||||
foreach (Match match in _regexToFindTagsInReleaseTitle.Matches(input))
|
||||
private readonly Regex _stripCyrillicRegex = new Regex(@"(\([\p{IsCyrillic}\W]+\))|(^[\p{IsCyrillic}\W\d]+\/ )|([\p{IsCyrillic} \-]+,+)|([\p{IsCyrillic}]+)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
private readonly Regex _tvTitleCommaRegex = new Regex(@"\s(\d+),(\d+)", RegexOptions.Compiled);
|
||||
private readonly Regex _tvTitleCyrillicXRegex = new Regex(@"([\s-])Х+([\s\)\]])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
private readonly Regex _tvTitleRusSeasonEpisodeOfRegex = new Regex(@"Сезон\s*[:]*\s+(\d+).+(?:Серии|Эпизод|Выпуски)+\s*[:]*\s+(\d+(?:-\d+)?)\s*из\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
private readonly Regex _tvTitleRusSeasonEpisodeRegex = new Regex(@"Сезон\s*[:]*\s+(\d+).+(?:Серии|Эпизод|Выпуски)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
private readonly Regex _tvTitleRusSeasonRegex = new Regex(@"Сезон\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
private readonly Regex _tvTitleRusEpisodeOfRegex = new Regex(@"(?:Серии|Эпизод|Выпуски)+\s*[:]*\s+(\d+(?:-\d+)?)\s*из\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
private readonly Regex _tvTitleRusEpisodeRegex = new Regex(@"(?:Серии|Эпизод|Выпуски)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
public string Parse(string title, ICollection<int> category, bool stripCyrillicLetters = true, bool moveAllTagsToEndOfReleaseTitle = false, bool moveFirstTagsToEndOfReleaseTitle = false)
|
||||
{
|
||||
if (match.Index > expectedIndex)
|
||||
// https://www.fileformat.info/info/unicode/category/Pd/list.htm
|
||||
title = Regex.Replace(title, @"\p{Pd}", "-", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
// replace double 4K quality in title
|
||||
title = Regex.Replace(title, @"\b(2160p), 4K\b", "$1", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
if (IsAnyTvCategory(category))
|
||||
{
|
||||
var substring = input.Substring(expectedIndex, match.Index - expectedIndex);
|
||||
if (string.IsNullOrWhiteSpace(substring))
|
||||
expectedIndex = match.Index;
|
||||
else
|
||||
break;
|
||||
title = _tvTitleCommaRegex.Replace(title, " $1-$2");
|
||||
title = _tvTitleCyrillicXRegex.Replace(title, "$1XX$2");
|
||||
|
||||
title = _tvTitleRusSeasonEpisodeOfRegex.Replace(title, "S$1E$2 of $3");
|
||||
title = _tvTitleRusSeasonEpisodeRegex.Replace(title, "S$1E$2");
|
||||
title = _tvTitleRusSeasonRegex.Replace(title, "S$1");
|
||||
title = _tvTitleRusEpisodeOfRegex.Replace(title, "E$1 of $2");
|
||||
title = _tvTitleRusEpisodeRegex.Replace(title, "E$1");
|
||||
}
|
||||
var tag = match.ToString();
|
||||
output = output.Replace(tag, "") + tag;
|
||||
expectedIndex += tag.Length;
|
||||
else if (IsAnyMovieCategory(category))
|
||||
{
|
||||
// remove director's name from title
|
||||
// rutracker movies titles look like: russian name / english name (russian director / english director) other stuff
|
||||
// Ирландец / The Irishman (Мартин Скорсезе / Martin Scorsese) [2019, США, криминал, драма, биография, WEB-DL 1080p] Dub (Пифагор) + MVO (Jaskier) + AVO (Юрий Сербин) + Sub Rus, Eng + Original Eng
|
||||
// this part should be removed: (Мартин Скорсезе / Martin Scorsese)
|
||||
title = Regex.Replace(title, @"(\([\p{IsCyrillic}\W]+)\s/\s(.+?)\)", string.Empty, RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
// Bluray quality fix: radarr parse Blu-ray Disc as Bluray-1080p but should be BR-DISK
|
||||
title = Regex.Replace(title, @"\bBlu-ray Disc\b", "BR-DISK", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
// language fix: all rutracker releases contains russian track
|
||||
if (title.IndexOf("rus", StringComparison.OrdinalIgnoreCase) < 0)
|
||||
title += " rus";
|
||||
}
|
||||
|
||||
if (stripCyrillicLetters)
|
||||
title = _stripCyrillicRegex.Replace(title, string.Empty).Trim(' ', '-');
|
||||
|
||||
if (moveAllTagsToEndOfReleaseTitle)
|
||||
title = MoveAllTagsToEndOfReleaseTitle(title);
|
||||
else if (moveFirstTagsToEndOfReleaseTitle)
|
||||
title = MoveFirstTagsToEndOfReleaseTitle(title);
|
||||
|
||||
if (IsAnyAudioCategory(category))
|
||||
title = DetectRereleaseInReleaseTitle(title);
|
||||
|
||||
title = Regex.Replace(title, @"\b-Rip\b", "Rip", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, @"\bHDTVRip\b", "HDTV", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, @"\bWEB-DLRip\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, @"\bWEBDLRip\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, @"\bWEBDL\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, @"\bКураж-Бамбей\b", "kurazh", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
title = Regex.Replace(title, @"\(\s*\/\s*", "(", RegexOptions.Compiled);
|
||||
title = Regex.Replace(title, @"\s*\/\s*\)", ")", RegexOptions.Compiled);
|
||||
|
||||
title = Regex.Replace(title, @"[\[\(]\s*[\)\]]", "", RegexOptions.Compiled);
|
||||
|
||||
title = title.Trim(' ', '&', ',', '.', '!', '?', '+', '-', '_', '|', '/', '\\', ':');
|
||||
|
||||
// replace multiple spaces with a single space
|
||||
title = Regex.Replace(title, @"\s+", " ");
|
||||
|
||||
return title.Trim();
|
||||
}
|
||||
output = output.Trim();
|
||||
return output;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Searches the release title to find a 'year1/year2' pattern that would indicate that this is a re-release of an old music album.
|
||||
/// If the release is found to be a re-release, this is added to the title as a new tag.
|
||||
/// Not to be confused with discographies; they mostly follow the 'year1-year2' pattern.
|
||||
/// </summary>
|
||||
private string DetectRereleaseInReleaseTitle(string input)
|
||||
{
|
||||
var fullTitle = input;
|
||||
private static bool IsAnyTvCategory(ICollection<int> category) => category.Contains(TorznabCatType.TV.ID) || TorznabCatType.TV.SubCategories.Any(subCat => category.Contains(subCat.ID));
|
||||
|
||||
var squareBracketTags = input.FindSubstringsBetween('[', ']', includeOpeningAndClosing: true);
|
||||
input = input.RemoveSubstrings(squareBracketTags);
|
||||
private static bool IsAnyMovieCategory(ICollection<int> category) => category.Contains(TorznabCatType.Movies.ID) || TorznabCatType.Movies.SubCategories.Any(subCat => category.Contains(subCat.ID));
|
||||
|
||||
var roundBracketTags = input.FindSubstringsBetween('(', ')', includeOpeningAndClosing: true);
|
||||
input = input.RemoveSubstrings(roundBracketTags);
|
||||
private static bool IsAnyAudioCategory(ICollection<int> category) => category.Contains(TorznabCatType.Audio.ID) || TorznabCatType.Audio.SubCategories.Any(subCat => category.Contains(subCat.ID));
|
||||
|
||||
var regex = new Regex(@"\d{4}");
|
||||
var yearsInTitle = regex.Matches(input);
|
||||
|
||||
if (yearsInTitle == null || yearsInTitle.Count < 2)
|
||||
private static string MoveAllTagsToEndOfReleaseTitle(string input)
|
||||
{
|
||||
//Can only be a re-release if there's at least 2 years in the title.
|
||||
return fullTitle;
|
||||
var output = input;
|
||||
foreach (var findTagsRegex in _FindTagsInTitlesRegexList)
|
||||
{
|
||||
foreach (Match match in findTagsRegex.Matches(input))
|
||||
{
|
||||
var tag = match.ToString();
|
||||
output = $"{output.Replace(tag, "")} {tag}".Trim();
|
||||
}
|
||||
}
|
||||
|
||||
return output.Trim();
|
||||
}
|
||||
|
||||
regex = new Regex(@"(\d{4}) *\/ *(\d{4})");
|
||||
var regexMatch = regex.Match(input);
|
||||
if (!regexMatch.Success)
|
||||
private static string MoveFirstTagsToEndOfReleaseTitle(string input)
|
||||
{
|
||||
//Not in the expected format. Return the unaltered title.
|
||||
return fullTitle;
|
||||
var output = input;
|
||||
foreach (var findTagsRegex in _FindTagsInTitlesRegexList)
|
||||
{
|
||||
var expectedIndex = 0;
|
||||
foreach (Match match in findTagsRegex.Matches(output))
|
||||
{
|
||||
if (match.Index > expectedIndex)
|
||||
{
|
||||
var substring = output.Substring(expectedIndex, match.Index - expectedIndex);
|
||||
if (string.IsNullOrWhiteSpace(substring))
|
||||
expectedIndex = match.Index;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
var tag = match.ToString();
|
||||
var regex = new Regex(Regex.Escape(tag));
|
||||
output = $"{regex.Replace(output, string.Empty, 1)} {tag}".Trim();
|
||||
expectedIndex += tag.Length;
|
||||
}
|
||||
}
|
||||
|
||||
return output.Trim();
|
||||
}
|
||||
|
||||
var originalYear = regexMatch.Groups[1].ToString();
|
||||
fullTitle = fullTitle.Replace(regexMatch.ToString(), originalYear);
|
||||
/// <summary>
|
||||
/// Searches the release title to find a 'year1/year2' pattern that would indicate that this is a re-release of an old music album.
|
||||
/// If the release is found to be a re-release, this is added to the title as a new tag.
|
||||
/// Not to be confused with discographies; they mostly follow the 'year1-year2' pattern.
|
||||
/// </summary>
|
||||
private static string DetectRereleaseInReleaseTitle(string input)
|
||||
{
|
||||
var fullTitle = input;
|
||||
|
||||
return fullTitle + "(Re-release)";
|
||||
var squareBracketTags = input.FindSubstringsBetween('[', ']', includeOpeningAndClosing: true);
|
||||
input = input.RemoveSubstrings(squareBracketTags);
|
||||
|
||||
var roundBracketTags = input.FindSubstringsBetween('(', ')', includeOpeningAndClosing: true);
|
||||
input = input.RemoveSubstrings(roundBracketTags);
|
||||
|
||||
var regex = new Regex(@"\d{4}");
|
||||
var yearsInTitle = regex.Matches(input);
|
||||
|
||||
if (yearsInTitle == null || yearsInTitle.Count < 2)
|
||||
{
|
||||
//Can only be a re-release if there's at least 2 years in the title.
|
||||
return fullTitle;
|
||||
}
|
||||
|
||||
regex = new Regex(@"(\d{4}) *\/ *(\d{4})");
|
||||
var regexMatch = regex.Match(input);
|
||||
if (!regexMatch.Success)
|
||||
{
|
||||
//Not in the expected format. Return the unaltered title.
|
||||
return fullTitle;
|
||||
}
|
||||
|
||||
var originalYear = regexMatch.Groups[1].ToString();
|
||||
fullTitle = fullTitle.Replace(regexMatch.ToString(), originalYear);
|
||||
|
||||
return fullTitle + "(Re-release)";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
|
@ -9,29 +10,26 @@ using Jackett.Common.Models;
|
|||
using Jackett.Common.Models.IndexerConfig.Bespoke;
|
||||
using Jackett.Common.Services.Interfaces;
|
||||
using Jackett.Common.Utils;
|
||||
using Jackett.Common.Utils.Clients;
|
||||
using Newtonsoft.Json.Linq;
|
||||
using NLog;
|
||||
using WebClient = Jackett.Common.Utils.Clients.WebClient;
|
||||
|
||||
namespace Jackett.Common.Indexers
|
||||
{
|
||||
[ExcludeFromCodeCoverage]
|
||||
public class Toloka : BaseWebIndexer
|
||||
{
|
||||
private string LoginUrl => SiteLink + "/login.php";
|
||||
private string SearchUrl => SiteLink + "/tracker.php";
|
||||
|
||||
protected string cap_sid = null;
|
||||
protected string cap_code_field = null;
|
||||
|
||||
private new ConfigurationDataToloka configData
|
||||
{
|
||||
get => (ConfigurationDataToloka)base.configData;
|
||||
set => base.configData = value;
|
||||
}
|
||||
|
||||
public Toloka(IIndexerConfigurationService configService, WebClient wc, Logger l, IProtectionService ps,
|
||||
ICacheService cs)
|
||||
private readonly TitleParser _titleParser = new TitleParser();
|
||||
private string LoginUrl => SiteLink + "login.php";
|
||||
private string SearchUrl => SiteLink + "tracker.php";
|
||||
|
||||
public Toloka(IIndexerConfigurationService configService, WebClient wc, Logger l, IProtectionService ps, ICacheService cs)
|
||||
: base(id: "toloka",
|
||||
name: "Toloka.to",
|
||||
description: "Toloka is a Semi-Private Ukrainian torrent site with a thriving file-sharing community",
|
||||
|
@ -209,6 +207,7 @@ namespace Jackett.Common.Indexers
|
|||
{ "password", configData.Password.Value },
|
||||
{ "autologin", "on" },
|
||||
{ "ssl", "on" },
|
||||
{ "redirect", "" },
|
||||
{ "login", "Вхід" }
|
||||
};
|
||||
|
||||
|
@ -216,15 +215,14 @@ namespace Jackett.Common.Indexers
|
|||
await ConfigureIfOK(result.Cookies, result.ContentString != null && result.ContentString.Contains("logout=true"), () =>
|
||||
{
|
||||
logger.Debug(result.ContentString);
|
||||
var errorMessage = "Unknown error message, please report";
|
||||
var LoginResultParser = new HtmlParser();
|
||||
var LoginResultDocument = LoginResultParser.ParseDocument(result.ContentString);
|
||||
var errormsg = LoginResultDocument.QuerySelector("h4[class=\"warnColor1 tCenter mrg_16\"]");
|
||||
if (errormsg != null)
|
||||
errorMessage = errormsg.TextContent;
|
||||
|
||||
throw new ExceptionWithConfigData(errorMessage, configData);
|
||||
var loginResultParser = new HtmlParser();
|
||||
var loginResultDocument = loginResultParser.ParseDocument(result.ContentString);
|
||||
var errorMessage = loginResultDocument.QuerySelector("table.forumline table span.gen")?.FirstChild?.TextContent;
|
||||
|
||||
throw new ExceptionWithConfigData(errorMessage ?? "Unknown error message, please report.", configData);
|
||||
});
|
||||
|
||||
return IndexerConfigurationStatus.RequiresTesting;
|
||||
}
|
||||
|
||||
|
@ -241,16 +239,12 @@ namespace Jackett.Common.Indexers
|
|||
|
||||
// if the search string is empty use the getnew view
|
||||
if (string.IsNullOrWhiteSpace(searchString))
|
||||
{
|
||||
qc.Add("nm", searchString);
|
||||
}
|
||||
else // use the normal search
|
||||
{
|
||||
searchString = searchString.Replace("-", " ");
|
||||
if (query.Season != 0)
|
||||
{
|
||||
searchString += " Сезон " + query.Season;
|
||||
}
|
||||
qc.Add("nm", searchString);
|
||||
}
|
||||
|
||||
|
@ -259,81 +253,63 @@ namespace Jackett.Common.Indexers
|
|||
|
||||
var searchUrl = SearchUrl + "?" + qc.GetQueryString();
|
||||
var results = await RequestWithCookiesAsync(searchUrl);
|
||||
|
||||
if (!results.ContentString.Contains("logout=true"))
|
||||
{
|
||||
// re login
|
||||
await ApplyConfiguration(null);
|
||||
results = await RequestWithCookiesAsync(searchUrl);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var RowsSelector = "table.forumline > tbody > tr[class*=prow]";
|
||||
var searchResultParser = new HtmlParser();
|
||||
var searchResultDocument = searchResultParser.ParseDocument(results.ContentString);
|
||||
var rows = searchResultDocument.QuerySelectorAll("table.forumline > tbody > tr[class*=\"prow\"]");
|
||||
|
||||
var SearchResultParser = new HtmlParser();
|
||||
var SearchResultDocument = SearchResultParser.ParseDocument(results.ContentString);
|
||||
var Rows = SearchResultDocument.QuerySelectorAll(RowsSelector);
|
||||
foreach (var Row in Rows)
|
||||
foreach (var row in rows)
|
||||
{
|
||||
try
|
||||
{
|
||||
var qDownloadLink = Row.QuerySelector("td:nth-child(6) > a");
|
||||
var qDownloadLink = row.QuerySelector("td:nth-child(6) > a");
|
||||
if (qDownloadLink == null) // Expects moderation
|
||||
continue;
|
||||
|
||||
var qDetailsLink = Row.QuerySelector("td:nth-child(3) > a");
|
||||
var qSize = Row.QuerySelector("td:nth-child(7)");
|
||||
var seedersStr = Row.QuerySelector("td:nth-child(10) > b").TextContent;
|
||||
var seeders = string.IsNullOrWhiteSpace(seedersStr) ? 0 : ParseUtil.CoerceInt(seedersStr);
|
||||
var timestr = Row.QuerySelector("td:nth-child(13)").TextContent;
|
||||
var forum = Row.QuerySelector("td:nth-child(2) > a");
|
||||
var forumid = forum.GetAttribute("href").Split('=')[1];
|
||||
var qDetailsLink = row.QuerySelector("td:nth-child(3) > a");
|
||||
var details = new Uri(SiteLink + qDetailsLink.GetAttribute("href"));
|
||||
var title = qDetailsLink.TextContent.Trim();
|
||||
var link = new Uri(SiteLink + qDownloadLink.GetAttribute("href"));
|
||||
var size = ReleaseInfo.GetBytes(qSize.TextContent);
|
||||
var leechers = ParseUtil.CoerceInt(Row.QuerySelector("td:nth-child(11) > b").TextContent);
|
||||
var publishDate = DateTimeUtil.FromFuzzyTime(timestr);
|
||||
var forumLink = row.QuerySelector("td:nth-child(2) > a").GetAttribute("href");
|
||||
var forumId = ParseUtil.GetArgumentFromQueryString(forumLink, "f");
|
||||
var category = MapTrackerCatToNewznab(forumId);
|
||||
var seedersStr = row.QuerySelector("td:nth-child(10) > b").TextContent;
|
||||
var seeders = string.IsNullOrWhiteSpace(seedersStr) ? 0 : ParseUtil.CoerceInt(seedersStr);
|
||||
var leechers = ParseUtil.CoerceInt(row.QuerySelector("td:nth-child(11) > b").TextContent);
|
||||
|
||||
var release = new ReleaseInfo
|
||||
{
|
||||
MinimumRatio = 1,
|
||||
MinimumSeedTime = 0,
|
||||
Title = qDetailsLink.TextContent,
|
||||
Guid = details,
|
||||
Details = details,
|
||||
Link = link,
|
||||
Guid = details,
|
||||
Size = size,
|
||||
Title = _titleParser.Parse(title, category, configData.StripCyrillicLetters.Value),
|
||||
Description = title,
|
||||
Category = category,
|
||||
Size = ReleaseInfo.GetBytes(row.QuerySelector("td:nth-child(7)").TextContent),
|
||||
Seeders = seeders,
|
||||
Peers = leechers + seeders,
|
||||
Grabs = 0, //ParseUtil.CoerceLong(Row.QuerySelector("td:nth-child(9)").TextContent);
|
||||
PublishDate = publishDate,
|
||||
Category = MapTrackerCatToNewznab(forumid),
|
||||
PublishDate = DateTimeUtil.FromFuzzyTime(row.QuerySelector("td:nth-child(13)").TextContent),
|
||||
DownloadVolumeFactor = 1,
|
||||
UploadVolumeFactor = 1
|
||||
UploadVolumeFactor = 1,
|
||||
MinimumRatio = 1,
|
||||
MinimumSeedTime = 0
|
||||
};
|
||||
|
||||
// TODO cleanup
|
||||
if (release.Category.Contains(TorznabCatType.TV.ID))
|
||||
{
|
||||
// extract season and episodes
|
||||
var regex = new Regex(".+\\/\\s([^а-яА-я\\/]+)\\s\\/.+Сезон\\s*[:]*\\s+(\\d+).+(?:Серії|Епізод)+\\s*[:]*\\s+(\\d+-*\\d*).+,\\s+(.+)\\]\\s(.+)");
|
||||
var title = regex.Replace(release.Title, "$1 - S$2E$3 - rus $4 $5");
|
||||
title = Regex.Replace(title, "-Rip", "Rip", RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, "WEB-DLRip", "WEBDL", RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, "WEB-DL", "WEBDL", RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, "HDTVRip", "HDTV", RegexOptions.IgnoreCase);
|
||||
|
||||
release.Title = title;
|
||||
}
|
||||
else if (configData.StripCyrillicLetters.Value)
|
||||
{
|
||||
var regex = new Regex(@"(\([А-Яа-яіІєЄїЇ\W]+\))|(^[А-Яа-яіІєЄїЇ\W\d]+\/ )|([а-яА-ЯіІєЄїЇ \-]+,+)|([а-яА-ЯіІєЄїЇ]+)");
|
||||
release.Title = regex.Replace(release.Title, "");
|
||||
}
|
||||
|
||||
releases.Add(release);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.Error(string.Format("{0}: Error while parsing row '{1}':\n\n{2}", Id, Row.OuterHtml, ex));
|
||||
logger.Error($"{Id}: Error while parsing row '{row.OuterHtml}':\n\n{ex}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -344,5 +320,112 @@ namespace Jackett.Common.Indexers
|
|||
|
||||
return releases;
|
||||
}
|
||||
|
||||
public class TitleParser
|
||||
{
|
||||
private static readonly List<Regex> _FindTagsInTitlesRegexList = new List<Regex>
|
||||
{
|
||||
new Regex(@"\((?>\((?<c>)|[^()]+|\)(?<-c>))*(?(c)(?!))\)"),
|
||||
new Regex(@"\[(?>\[(?<c>)|[^\[\]]+|\](?<-c>))*(?(c)(?!))\]")
|
||||
};
|
||||
|
||||
private readonly Regex _tvTitleCommaRegex = new Regex(@"\s(\d+),(\d+)", RegexOptions.Compiled);
|
||||
private readonly Regex _tvTitleCyrillicXRegex = new Regex(@"([\s-])Х+([\)\]])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
private readonly Regex _tvTitleMultipleSeasonsRegex = new Regex(@"(?:Сезон|Seasons?)\s*[:]*\s+(\d+-\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
private readonly Regex _tvTitleUkrSeasonEpisodeOfRegex = new Regex(@"Сезон\s*[:]*\s+(\d+).+(?:Серії|Серія|Серій|Епізод)+\s*[:]*\s+(\d+(?:-\d+)?)\s*з\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
private readonly Regex _tvTitleUkrSeasonEpisodeRegex = new Regex(@"Сезон\s*[:]*\s+(\d+).+(?:Серії|Серія|Серій|Епізод)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
private readonly Regex _tvTitleUkrSeasonRegex = new Regex(@"Сезон\s*[:]*\s+(\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
private readonly Regex _tvTitleUkrEpisodeOfRegex = new Regex(@"(?:Серії|Серія|Серій|Епізод)+\s*[:]*\s+(\d+(?:-\d+)?)\s*з\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
private readonly Regex _tvTitleUkrEpisodeRegex = new Regex(@"(?:Серії|Серія|Серій|Епізод)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
private readonly Regex _tvTitleEngSeasonEpisodeOfRegex = new Regex(@"Season\s*[:]*\s+(\d+).+(?:Episodes?)+\s*[:]*\s+(\d+(?:-\d+)?)\s*of\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
private readonly Regex _tvTitleEngSeasonEpisodeRegex = new Regex(@"Season\s*[:]*\s+(\d+).+(?:Episodes?)+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
private readonly Regex _tvTitleEngSeasonRegex = new Regex(@"Season\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
private readonly Regex _tvTitleEngEpisodeOfRegex = new Regex(@"(?:Episodes?)+\s*[:]*\s+(\d+(?:-\d+)?)\s*of\s*([\w?])", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
private readonly Regex _tvTitleEngEpisodeRegex = new Regex(@"(?:Episodes?)+\s*[:]+\s*[:]*\s+(\d+(?:-\d+)?)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
private readonly Regex _stripCyrillicRegex = new Regex(@"(\([\p{IsCyrillic}\W]+\))|(^[\p{IsCyrillic}\W\d]+\/ )|([\p{IsCyrillic} \-]+,+)|([\p{IsCyrillic}]+)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
public string Parse(string title, ICollection<int> category, bool stripCyrillicLetters = true)
|
||||
{
|
||||
// https://www.fileformat.info/info/unicode/category/Pd/list.htm
|
||||
title = Regex.Replace(title, @"\p{Pd}", "-", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
if (IsAnyTvCategory(category))
|
||||
{
|
||||
title = _tvTitleCommaRegex.Replace(title, " $1-$2");
|
||||
title = _tvTitleCyrillicXRegex.Replace(title, "$1XX$2");
|
||||
|
||||
// special case for multiple seasons
|
||||
title = _tvTitleMultipleSeasonsRegex.Replace(title, "S$1");
|
||||
|
||||
title = _tvTitleUkrSeasonEpisodeOfRegex.Replace(title, "S$1E$2 of $3");
|
||||
title = _tvTitleUkrSeasonEpisodeRegex.Replace(title, "S$1E$2");
|
||||
title = _tvTitleUkrSeasonRegex.Replace(title, "S$1");
|
||||
title = _tvTitleUkrEpisodeOfRegex.Replace(title, "E$1 of $2");
|
||||
title = _tvTitleUkrEpisodeRegex.Replace(title, "E$1");
|
||||
|
||||
title = _tvTitleEngSeasonEpisodeOfRegex.Replace(title, "S$1E$2 of $3");
|
||||
title = _tvTitleEngSeasonEpisodeRegex.Replace(title, "S$1E$2");
|
||||
title = _tvTitleEngSeasonRegex.Replace(title, "S$1");
|
||||
title = _tvTitleEngEpisodeOfRegex.Replace(title, "E$1 of $2");
|
||||
title = _tvTitleEngEpisodeRegex.Replace(title, "E$1");
|
||||
}
|
||||
|
||||
if (stripCyrillicLetters)
|
||||
title = _stripCyrillicRegex.Replace(title, string.Empty).Trim(' ', '-');
|
||||
|
||||
title = Regex.Replace(title, @"\b-Rip\b", "Rip", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, @"\bHDTVRip\b", "HDTV", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, @"\bWEB-DLRip\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, @"\bWEBDLRip\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
title = Regex.Replace(title, @"\bWEBDL\b", "WEB-DL", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
||||
|
||||
title = MoveFirstTagsToEndOfReleaseTitle(title);
|
||||
|
||||
title = Regex.Replace(title, @"\(\s*\/\s*", "(", RegexOptions.Compiled);
|
||||
title = Regex.Replace(title, @"\s*\/\s*\)", ")", RegexOptions.Compiled);
|
||||
|
||||
title = Regex.Replace(title, @"[\[\(]\s*[\)\]]", "", RegexOptions.Compiled);
|
||||
|
||||
title = title.Trim(' ', '&', ',', '.', '!', '?', '+', '-', '_', '|', '/', '\\', ':');
|
||||
|
||||
// replace multiple spaces with a single space
|
||||
title = Regex.Replace(title, @"\s+", " ");
|
||||
|
||||
return title.Trim();
|
||||
}
|
||||
|
||||
private static bool IsAnyTvCategory(ICollection<int> category) => category.Contains(TorznabCatType.TV.ID) || TorznabCatType.TV.SubCategories.Any(subCat => category.Contains(subCat.ID));
|
||||
|
||||
private static string MoveFirstTagsToEndOfReleaseTitle(string input)
|
||||
{
|
||||
var output = input;
|
||||
foreach (var findTagsRegex in _FindTagsInTitlesRegexList)
|
||||
{
|
||||
var expectedIndex = 0;
|
||||
foreach (Match match in findTagsRegex.Matches(output))
|
||||
{
|
||||
if (match.Index > expectedIndex)
|
||||
{
|
||||
var substring = output.Substring(expectedIndex, match.Index - expectedIndex);
|
||||
if (string.IsNullOrWhiteSpace(substring))
|
||||
expectedIndex = match.Index;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
var tag = match.ToString();
|
||||
var regex = new Regex(Regex.Escape(tag));
|
||||
output = $"{regex.Replace(output, string.Empty, 1)} {tag}".Trim();
|
||||
expectedIndex += tag.Length;
|
||||
}
|
||||
}
|
||||
|
||||
return output.Trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using Jackett.Common.Models;
|
||||
using NUnit.Framework;
|
||||
|
||||
namespace Jackett.Test.Common.Indexers.RuTracker
|
||||
{
|
||||
[TestFixture]
|
||||
public class RuTrackerTests
|
||||
{
|
||||
[TestCaseSource(typeof(TitleParserTestData), nameof(TitleParserTestData.TestCases))]
|
||||
public string TestTitleParsing(string title, ICollection<int> category, bool stripCyrillicLetters, bool moveAllTagsToEndOfReleaseTitle, bool moveFirstTagsToEndOfReleaseTitle)
|
||||
{
|
||||
var titleParser = new Jackett.Common.Indexers.RuTracker.TitleParser();
|
||||
|
||||
return titleParser.Parse(title, category, stripCyrillicLetters, moveAllTagsToEndOfReleaseTitle, moveFirstTagsToEndOfReleaseTitle);
|
||||
}
|
||||
}
|
||||
|
||||
public class TitleParserTestData
|
||||
{
|
||||
public static IEnumerable TestCases
|
||||
{
|
||||
get
|
||||
{
|
||||
yield return new TestCaseData("Терапия / Shrinking / Сезон: 1 / серии: 1-2 из 10 (Джеймс Понсольдт) [2023, США, комедия, WEB-DLRip] Dub (Iyuno-SDI Group) + Original + Sub Rus", new List<int> { TorznabCatType.TVSD.ID }, false, false, false).Returns("Терапия / Shrinking / S1E1-2 of 10 (Джеймс Понсольдт) [2023, США, комедия, WEB-DL] Dub (Iyuno-SDI Group) + Original + Sub Rus");
|
||||
yield return new TestCaseData("Новичок / Новобранец / The Rookie / сезон: 5 / Серии: 1-14 из ?? (Майкл Гои, Билл Роу) [2022, США, боевик, драма, криминал, WEB-DLRip] MVO (LostFilm) + Original", new List<int> { TorznabCatType.TVForeign.ID }, false, false, false).Returns("Новичок / Новобранец / The Rookie / S5E1-14 of ?? (Майкл Гои, Билл Роу) [2022, США, боевик, драма, криминал, WEB-DL] MVO (LostFilm) + Original");
|
||||
yield return new TestCaseData("Красный яр / Сезон: 1-8 (Михаил Вассербаум) [2022, детектив, WEBRip-AVC]", new List<int> { TorznabCatType.TVOther.ID }, false, false, false).Returns("Красный яр / S1-8 (Михаил Вассербаум) [2022, детектив, WEBRip-AVC]");
|
||||
yield return new TestCaseData("Просто Михалыч / Эпизод: 1-5 из ХХ (Евгений Корчагин) [2022, комедия, WEBRip 720p]", new List<int> { TorznabCatType.TVHD.ID }, false, false, false).Returns("Просто Михалыч / E1-5 of XX (Евгений Корчагин) [2022, комедия, WEBRip 720p]");
|
||||
yield return new TestCaseData("Открывай, полиция! / Выпуски: 1,2 (Сергей Гинзбург) [2022, комедия, WEBRip]", new List<int> { TorznabCatType.TV.ID }, false, false, false).Returns("Открывай, полиция! / E1-2 (Сергей Гинзбург) [2022, комедия, WEBRip]");
|
||||
|
||||
yield return new TestCaseData("Терапия / Shrinking / Сезон: 1 / серии: 1-2 из 10 (Джеймс Понсольдт) [2023, США, комедия, WEB-DLRip] Dub (Iyuno-SDI Group) + Original + Sub Rus", new List<int> { TorznabCatType.TVHD.ID }, true, false, false).Returns("Shrinking / S1E1-2 of 10 [2023, WEB-DL] Dub (Iyuno-SDI Group) + Original + Sub Rus");
|
||||
yield return new TestCaseData("Новичок / Новобранец / The Rookie / сезон: 5 / Серии: 1-14 из ?? (Майкл Гои, Билл Роу) [2022, США, боевик, драма, криминал, WEB-DLRip] MVO (LostFilm) + Original", new List<int> { TorznabCatType.TVForeign.ID }, true, false, false).Returns("The Rookie / S5E1-14 of ?? [2022, WEB-DL] MVO (LostFilm) + Original");
|
||||
yield return new TestCaseData("Красный яр / Сезон: 1-8 (Михаил Вассербаум) [2022, детектив, WEBRip-AVC]", new List<int> { TorznabCatType.TVOther.ID }, true, false, false).Returns("S1-8 [2022, WEBRip-AVC]");
|
||||
yield return new TestCaseData("Просто Михалыч / Эпизод: 1-5 из ХХ (Евгений Корчагин) [2022, комедия, WEBRip 720p]", new List<int> { TorznabCatType.TVHD.ID }, true, false, false).Returns("E1-5 of XX [2022, WEBRip 720p]");
|
||||
yield return new TestCaseData("Открывай, полиция! / Выпуски: 1,2 (Сергей Гинзбург) [2022, комедия, WEBRip]", new List<int> { TorznabCatType.TV.ID }, true, false, false).Returns("E1-2 [2022, WEBRip]");
|
||||
|
||||
yield return new TestCaseData("Терапия / Shrinking / Сезон: 1 / Серии: 1-2 из 10 (Джеймс Понсольдт) [2023, США, комедия, WEB-DLRip] Dub (Iyuno-SDI Group) + Original + Sub Rus", new List<int> { TorznabCatType.TVUHD.ID }, true, false, true).Returns("Shrinking / S1E1-2 of 10 [2023, WEB-DL] Dub (Iyuno-SDI Group) + Original + Sub Rus");
|
||||
yield return new TestCaseData("Новичок / Новобранец / The Rookie / Сезон: 5 / Серии: 1-14 из ?? (Майкл Гои, Билл Роу) [2022, США, боевик, драма, криминал, WEB-DLRip] MVO (LostFilm) + Original", new List<int> { TorznabCatType.TVSport.ID }, true, false, true).Returns("The Rookie / S5E1-14 of ?? [2022, WEB-DL] MVO (LostFilm) + Original");
|
||||
|
||||
yield return new TestCaseData("Терапия / Shrinking / Сезон: 1 / Серии: 1-2 из 10 (Джеймс Понсольдт) [2023, США, комедия, WEB-DLRip] Dub (Iyuno-SDI Group) + Original + Sub Rus", new List<int> { TorznabCatType.TVAnime.ID }, true, true, false).Returns("Shrinking / S1E1-2 of 10 Dub + Original + Sub Rus (Iyuno-SDI Group) [2023, WEB-DL]");
|
||||
yield return new TestCaseData("Новичок / Новобранец / The Rookie / Сезон: 5 / Серии: 1-14 из ХХ (Майкл Гои, Билл Роу) [2022, США, боевик, драма, криминал, WEB-DLRip] MVO (LostFilm) + Original", new List<int> { TorznabCatType.TVDocumentary.ID }, true, true, false).Returns("The Rookie / S5E1-14 of XX MVO + Original (LostFilm) [2022, WEB-DL]");
|
||||
|
||||
yield return new TestCaseData("Терапия / Shrinking / Сезон: 1 / Серии: 1-2 из 10 (Джеймс Понсольдт) [2023, США, комедия, WEB-DLRip] Dub (Iyuno-SDI Group) + Original + Sub Rus", new List<int> { TorznabCatType.TVAnime.ID }, true, true, true).Returns("Shrinking / S1E1-2 of 10 Dub + Original + Sub Rus (Iyuno-SDI Group) [2023, WEB-DL]");
|
||||
yield return new TestCaseData("Новичок / Новобранец / The Rookie / Сезон: 5 / Серии: 1,14 из ?? (Майкл Гои, Билл Роу) [2022, США, боевик, драма, криминал, WEB-DLRip] MVO (LostFilm) + Original", new List<int> { TorznabCatType.TVDocumentary.ID }, true, true, true).Returns("The Rookie / S5E1-14 of ?? MVO + Original (LostFilm) [2022, WEB-DL]");
|
||||
|
||||
yield return new TestCaseData("Терапия / Shrinking / Сезон: 1 / Серии: 1-2 из 10 (Джеймс Понсольдт) [2023, США, комедия, WEB-DLRip] Dub (Iyuno-SDI Group) + Original + Sub Rus", new List<int> { TorznabCatType.TVHD.ID }, false, true, false).Returns("Терапия / Shrinking / S1E1-2 of 10 Dub + Original + Sub Rus (Джеймс Понсольдт) (Iyuno-SDI Group) [2023, США, комедия, WEB-DL]");
|
||||
yield return new TestCaseData("Новичок / Новобранец / The Rookie / Сезон: 5 / Серии: 1,14 из ХХ (Майкл Гои, Билл Роу) [2022, США, боевик, драма, криминал, WEB-DLRip] MVO (LostFilm) + Original", new List<int> { TorznabCatType.TVForeign.ID }, false, true, false).Returns("Новичок / Новобранец / The Rookie / S5E1-14 of XX MVO + Original (Майкл Гои, Билл Роу) (LostFilm) [2022, США, боевик, драма, криминал, WEB-DL]");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using Jackett.Common.Models;
|
||||
using NUnit.Framework;
|
||||
|
||||
namespace Jackett.Test.Common.Indexers.Toloka
|
||||
{
|
||||
[TestFixture]
|
||||
public class TolokaTests
|
||||
{
|
||||
[TestCaseSource(typeof(TitleParserTestData), nameof(TitleParserTestData.TestCases))]
|
||||
public string TestTitleParsing(string title, ICollection<int> category, bool stripCyrillicLetters)
|
||||
{
|
||||
var titleParser = new Jackett.Common.Indexers.Toloka.TitleParser();
|
||||
|
||||
return titleParser.Parse(title, category, stripCyrillicLetters);
|
||||
}
|
||||
}
|
||||
|
||||
public class TitleParserTestData
|
||||
{
|
||||
public static IEnumerable TestCases
|
||||
{
|
||||
get
|
||||
{
|
||||
yield return new TestCaseData("Правдива терапія (Сезон 1, серії 1-2) / Shrinking (Season 1, episodes 1-2) (2023) WEBRip 1080p Ukr/Eng", new List<int> { TorznabCatType.TV.ID }, true).Returns("Shrinking (S1E1-2) (2023) WEBRip 1080p Ukr/Eng (S1E1-2)");
|
||||
yield return new TestCaseData("Ші-Ра та принцеси могутності (сезон 1-2, серій 14 з 20) / She-Ra and the Princesses of Power (seasons 1-2, episodes 14 of 20) (2018) WEBRip 1080p", new List<int> { TorznabCatType.TVHD.ID }, true).Returns("She-Ra and the Princesses of Power (S1-2, E14 of 20) (2018) WEBRip 1080p (S1-2, E14 of 20)");
|
||||
yield return new TestCaseData("А інші сгорять у пеклі (Сезон 1, Серія 3) / Everyone Else Burns (Season 1, Episode 3) (2023) WEB-DL 1080p Ukr/Eng | Sub Ukr/Eng", new List<int> { TorznabCatType.TVOther.ID }, true).Returns("Everyone Else Burns (S1E3) (2023) WEB-DL 1080p Ukr/Eng | Sub Ukr/Eng (S1E3)");
|
||||
yield return new TestCaseData("У тілі (Сезон 2, Епізод 1,2 з ХХ) / In the flesh (Season 2, episodes 1,2 of XX) (2014) 1080p BDRip Eng | sub Ukr", new List<int> { TorznabCatType.TVSport.ID }, true).Returns("In the flesh (S2E1-2 of XX) (2014) 1080p BDRip Eng | sub Ukr (S2E1-2 of XX)");
|
||||
|
||||
yield return new TestCaseData("Правдива терапія (Сезон 1, серії 1-2) / Shrinking (Season 1, episodes 1-2) (2023) WEBRip 1080p Ukr/Eng", new List<int> { TorznabCatType.TVHD.ID }, false).Returns("Правдива терапія (S1E1-2) / Shrinking (S1E1-2) (2023) WEBRip 1080p Ukr/Eng");
|
||||
yield return new TestCaseData("Ші-Ра та принцеси могутності (сезон 1-2, серій 14 з 20) / She-Ra and the Princesses of Power (seasons 1-2, episodes 14 of 20) (2018) WEBRip 1080p", new List<int> { TorznabCatType.TVAnime.ID }, false).Returns("Ші-Ра та принцеси могутності (S1-2, E14 of 20) / She-Ra and the Princesses of Power (S1-2, E14 of 20) (2018) WEBRip 1080p");
|
||||
yield return new TestCaseData("А інші сгорять у пеклі (Сезон 1, Серія 3) / Everyone Else Burns (Season 1, Episode 3) (2023) WEB-DL 1080p Ukr/Eng | Sub Ukr/Eng", new List<int> { TorznabCatType.TVDocumentary.ID }, false).Returns("А інші сгорять у пеклі (S1E3) / Everyone Else Burns (S1E3) (2023) WEB-DL 1080p Ukr/Eng | Sub Ukr/Eng");
|
||||
yield return new TestCaseData("У тілі (Сезон 2, Епізод 1,2 з ХХ) / In the flesh (Season 2, episodes 1,2 of XX) (2014) 1080p BDRip Eng | sub Ukr", new List<int> { TorznabCatType.TV.ID }, false).Returns("У тілі (S2E1-2 of XX) / In the flesh (S2E1-2 of XX) (2014) 1080p BDRip Eng | sub Ukr");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue