Add AniDUB a Russian Anime site. resolves #5399 (#5490)

This commit is contained in:
MsKrypt1 2019-06-15 02:26:07 +03:00 committed by garfield69
parent 756161f1e7
commit 1a548d1c8c
3 changed files with 627 additions and 0 deletions

View File

@ -93,6 +93,7 @@ Developer note: The software implements the [Torznab](https://github.com/Sonarr/
### Supported Semi-Private Trackers
* 7tor
* Alein
* AniDUB
* ArenaBG
* CzTorrent
* Deildu

View File

@ -0,0 +1,610 @@
using AngleSharp.Dom;
using AngleSharp.Html.Parser;
using Jackett.Common.Models;
using Jackett.Common.Models.IndexerConfig.Bespoke;
using Jackett.Common.Services.Interfaces;
using Jackett.Common.Utils;
using Jackett.Common.Utils.Clients;
using Microsoft.AspNetCore.WebUtilities;
using Newtonsoft.Json.Linq;
using NLog;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace Jackett.Common.Indexers
{
internal class AniDub : BaseWebIndexer
{
private static readonly Regex EpisodeInfoRegex = new Regex(@"\[(.*?)(?: \(.*?\))? из (.*?)\]$", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex SeasonInfoQueryRegex = new Regex(@"S(\d+)(?:E\d*)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex SeasonInfoRegex = new Regex(@"(?:(?:TV-)|(?:ТВ-))(\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Lazy<Regex> StripRussianTitleRegex = new Lazy<Regex>(() => new Regex(@"^.*?\/\s*", RegexOptions.Compiled));
public AniDub(IIndexerConfigurationService configService, WebClient wc, Logger l, IProtectionService ps)
: base(name: "AniDUB",
description: "AniDUB Tracker is a semi-private russian tracker and release group for anime",
link: "https://tr.anidub.com/",
caps: new TorznabCapabilities(),
configService: configService,
client: wc,
logger: l,
p: ps,
configData: new ConfigurationDataAniDub())
{
Encoding = Encoding.UTF8;
Language = "ru-RU";
Type = "semi-private";
AddCategoryMapping(2, TorznabCatType.TVAnime, "Аниме TV");
AddCategoryMapping(14, TorznabCatType.TVAnime, "Аниме TV / Законченные сериалы");
AddCategoryMapping(10, TorznabCatType.TVAnime, "Аниме TV / Аниме Ongoing");
AddCategoryMapping(11, TorznabCatType.TVAnime, "Аниме TV / Многосерийный сёнэн");
AddCategoryMapping(13, TorznabCatType.XXX, "18+");
AddCategoryMapping(15, TorznabCatType.BooksComics, "Манга");
AddCategoryMapping(16, TorznabCatType.Audio, "OST");
AddCategoryMapping(17, TorznabCatType.Audio, "Подкасты");
AddCategoryMapping(3, TorznabCatType.TVAnime, "Аниме Фильмы");
AddCategoryMapping(4, TorznabCatType.TVAnime, "Аниме OVA");
AddCategoryMapping(5, TorznabCatType.TVAnime, "Аниме OVA |- Аниме ONA");
AddCategoryMapping(9, TorznabCatType.TV, "Дорамы");
AddCategoryMapping(6, TorznabCatType.TV, "Дорамы / Японские Сериалы и Фильмы");
AddCategoryMapping(7, TorznabCatType.TV, "Дорамы / Корейские Сериалы и Фильмы");
AddCategoryMapping(8, TorznabCatType.TV, "Дорамы / Китайские Сериалы и Фильмы");
AddCategoryMapping(12, TorznabCatType.Other, "Аниме Ongoing Анонсы");
AddCategoryMapping(1, TorznabCatType.Other, "Новости проекта Anidub");
}
private static Dictionary<string, string> CategoriesMap => new Dictionary<string, string>
{
{ "/anime_tv/full", "14" },
{ "/anime_tv/anime_ongoing", "10" },
{ "/anime_tv/shonen", "11" },
{ "/anime_tv", "2" },
{ "/xxx", "13" },
{ "/manga", "15" },
{ "/ost", "16" },
{ "/podcast", "17" },
{ "/anime_movie", "3" },
{ "/anime_ova/anime_ona", "5" },
{ "/anime_ova", "4" },
{ "/dorama/japan_dorama", "6" },
{ "/dorama/korea_dorama", "7" },
{ "/dorama/china_dorama", "8" },
{ "/dorama", "9" },
{ "/anons_ongoing", "12" },
};
private static ICollection<string> DefaultSearchCategories => new[] { "0" };
private ConfigurationDataAniDub Configuration
{
get { return (ConfigurationDataAniDub)configData; }
set { configData = value; }
}
/// <summary>
/// https://tr.anidub.com/index.php
/// </summary>
private string LoginUrl => SiteLink + "index.php";
/// <summary>
/// https://tr.anidub.com/index.php?do=search
/// </summary>
private string SearchUrl => SiteLink + "index.php?do=search";
public override async Task<IndexerConfigurationStatus> ApplyConfiguration(JToken configJson)
{
LoadValuesFromJson(configJson);
var data = new Dictionary<string, string>
{
{ "login_name", Configuration.Username.Value },
{ "login_password", Configuration.Password.Value },
{ "login", "submit" }
};
var result = await RequestLoginAndFollowRedirect(
LoginUrl,
data,
CookieHeader,
returnCookiesFromFirstCall: true
);
var parser = new HtmlParser();
var document = await parser.ParseDocumentAsync(result.Content);
await ConfigureIfOK(result.Cookies, IsAuthorized(result), () =>
{
const string ErrorSelector = "#content .berror .berror_c";
var errorMessage = document.QuerySelector(ErrorSelector).Text().Trim();
throw new ExceptionWithConfigData(errorMessage, Configuration);
});
return IndexerConfigurationStatus.Completed;
}
public override async Task<byte[]> Download(Uri link)
{
await EnsureAuthorized();
return await base.Download(link);
}
protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query)
{
// If the search string is empty use the latest releases
if (query.IsTest || query.SearchTerm.IsNullOrEmptyOrWhitespace())
{
return await FetchNewReleases();
}
else
{
return await PerformSearch(query);
}
}
private async Task EnsureAuthorized()
{
var result = await RequestStringWithCookies(SiteLink);
if (!IsAuthorized(result))
{
await ApplyConfiguration(null);
}
}
private async Task<List<ReleaseInfo>> FetchNewReleases()
{
const string ReleaseLinksSelector = "#dle-content > .story > .story_h > .lcol > h2 > a";
var result = await RequestStringWithCookies(SiteLink);
var releases = new List<ReleaseInfo>();
try
{
var parser = new HtmlParser();
var document = await parser.ParseDocumentAsync(result.Content);
foreach (var linkNode in document.QuerySelectorAll(ReleaseLinksSelector))
{
var url = linkNode.GetAttribute("href");
releases.AddRange(await FetchShowReleases(url));
}
}
catch (Exception ex)
{
OnParseError(result.Content, ex);
}
return releases;
}
private async Task<List<ReleaseInfo>> FetchShowReleases(string url)
{
const string ContentId = "dle-content";
const string ReleasesSelector = "#tabs .torrent_c > div";
var releases = new List<ReleaseInfo>();
var uri = new Uri(url);
var categories = ParseCategories(uri)?.ToArray();
if (categories == null)
{
// If no category then it should be a news topic
// Doesn't happen often
return releases;
}
var result = await RequestStringWithCookies(url);
try
{
var parser = new HtmlParser();
var document = await parser.ParseDocumentAsync(result.Content);
var content = document.GetElementById(ContentId);
var date = GetDateFromShowPage(url, content);
var baseTitle = GetBaseTitle(categories, content);
var bannerUrl = GetBannerUrl(url, content);
foreach (var releaseNode in content.QuerySelectorAll(ReleasesSelector))
{
IElement tabNode;
if (releaseNode.Children.Any(node => node.ClassName?.Contains("torrent_h") == true))
{
// No quality, one tab, seems like a buggy page
tabNode = releaseNode;
}
else
{
const StringComparison comparisonType = StringComparison.InvariantCultureIgnoreCase;
tabNode = releaseNode.Children.First(node => node.TagName.Equals("div", comparisonType));
}
var seeders = GetReleaseSeeders(tabNode);
var release = new ReleaseInfo
{
Title = BuildReleaseTitle(baseTitle, tabNode),
Guid = new Uri(GetReleaseGuid(url, tabNode)),
Comments = uri,
Link = GetReleaseLink(tabNode),
PublishDate = date,
Category = categories,
DownloadVolumeFactor = 0,
UploadVolumeFactor = 0,
Size = GetReleaseSize(tabNode),
Grabs = GetReleaseGrabs(tabNode),
Description = GetReleaseDescription(tabNode),
Seeders = seeders,
Peers = GetReleaseLeechers(tabNode) + seeders,
BannerUrl = bannerUrl
};
releases.Add(release);
}
}
catch (Exception ex)
{
OnParseError(result.Content, ex);
}
return releases;
}
private static string GetReleaseGuid(string url, IElement tabNode)
{
// Appending id to differentiate between different quality versions
return QueryHelpers.AddQueryString(url, "id", GetTorrentId(tabNode));
}
private static int GetReleaseLeechers(IElement tabNode)
{
const string LeechersSelector = ".list.down > .li_swing_m";
var leechersStr = tabNode.QuerySelector(LeechersSelector).Text();
int.TryParse(leechersStr, out var leechers);
return leechers;
}
private static int GetReleaseSeeders(IElement tabNode)
{
const string SeedersSelector = ".list.down > .li_distribute_m";
var seedersStr = tabNode.QuerySelector(SeedersSelector).Text();
int.TryParse(seedersStr, out var seeders);
return seeders;
}
private static string GetReleaseDescription(IElement tabNode)
{
const string DescriptionSelector = ".tech > pre";
return tabNode.QuerySelector(DescriptionSelector)?.Text()?.Trim();
}
private static long GetReleaseGrabs(IElement tabNode)
{
const string GrabsSelector = ".list.down > .li_download_m";
var grabsStr = tabNode.QuerySelector(GrabsSelector).Text();
long.TryParse(grabsStr, out var grabs);
return grabs;
}
private static long GetReleaseSize(IElement tabNode)
{
const string SizeSelector = ".list.down > .red";
var sizeStr = tabNode.QuerySelector(SizeSelector).Text();
return ReleaseInfo.GetBytes(sizeStr);
}
private Uri GetReleaseLink(IElement tabNode) =>
new Uri($"{SiteLink}engine/download.php?id={GetTorrentId(tabNode)}");
private static string GetTorrentId(IElement tabNode)
{
var nodeId = tabNode.Id;
// Format is "torrent_{id}_info"
return nodeId
.Replace("torrent_", string.Empty)
.Replace("_info", string.Empty);
}
private static string BuildReleaseTitle(string baseTitle, IElement tabNode)
{
var releaseNode = tabNode.ParentElement;
var quality = GetQuality(releaseNode);
if (!quality.IsNullOrEmptyOrWhitespace())
{
return $"{baseTitle} [{quality}]";
}
return baseTitle;
}
private static string GetQuality(IElement releaseNode)
{
// For some releases there's no block with quality
if (releaseNode.Id.IsNullOrEmptyOrWhitespace())
{
return null;
}
var quality = releaseNode.Id.Trim();
switch (quality.ToLowerInvariant())
{
case "tv720": return "HDTV 720p";
case "tv1080": return "HDTV 1080p";
case "bd720": return "BDRip 720p";
case "bd1080": return "BDRip 1080p";
case "hwp": return "SDTV";
default: return quality.ToUpperInvariant();
}
}
private Uri GetBannerUrl(string url, IElement content)
{
var bannerNode = content.QuerySelector(".poster_bg .poster img");
var bannerSrc = bannerNode.GetAttribute("src");
if (Uri.TryCreate(bannerSrc, UriKind.Absolute, out var bannerUrl))
{
return bannerUrl;
}
logger.Warn($"[AniDub] Banner URL couldn't be parsed on '{url}'. Banner node src: {bannerSrc}");
return null;
}
private string GetBaseTitle(int[] categories, IElement content)
{
var domTitle = content.QuerySelector("#news-title");
var baseTitle = domTitle.Text().Trim();
baseTitle = StripRussianTitle(baseTitle);
baseTitle = FixBookInfo(baseTitle);
var isShow = categories.Contains(TorznabCatType.TVAnime.ID);
if (isShow)
{
baseTitle = FixShowTitle(baseTitle);
}
else
{
// Just fix TV-\d to S\d and [\d+] to E\d
baseTitle = FixSeasonInfo(baseTitle);
baseTitle = FixEpisodeInfo(baseTitle);
}
baseTitle = FixMovieInfo(baseTitle);
return baseTitle.Trim();
}
private string FixShowTitle(string title)
{
var seasonNum = GetSeasonNum(title);
// Remove season info
title = SeasonInfoRegex.Replace(title, string.Empty);
// Normalize for parsing usages
// Should look like S01E01-E09
return EpisodeInfoRegex.Replace(
title,
match => match.Success ? $"S{seasonNum:00}E01-E{match.Groups[1]}" : string.Empty
);
}
private int GetSeasonNum(string title)
{
// First season is often skipped so return 1 if nothing matched
const int defaultSeason = 1;
var seasonMatch = SeasonInfoRegex.Match(title);
if (!seasonMatch.Success)
{
return defaultSeason;
}
var seasonVal = seasonMatch.Groups[defaultSeason].Value;
if (int.TryParse(seasonVal, out var seasonNum))
{
return seasonNum;
}
return defaultSeason;
}
private string StripRussianTitle(string title)
{
if (Configuration.StripRussianTitle.Value)
{
return StripRussianTitleRegex.Value.Replace(title, string.Empty);
}
return title;
}
private static string FixBookInfo(string title) =>
title.Replace("[Главы ", "[");
private static string FixEpisodeInfo(string title) =>
EpisodeInfoRegex.Replace(
title,
match => match.Success ? $"E01-E{match.Groups[1]}" : string.Empty
);
private static string FixMovieInfo(string title) =>
title.Replace(" [Movie]", string.Empty);
private static string FixSeasonInfo(string title) =>
SeasonInfoRegex.Replace(
title,
match => match.Success ? $"S{int.Parse(match.Groups[1].Value):00}" : string.Empty
);
private DateTime GetDateFromShowPage(string url, IElement content)
{
const string dateFormat = "d-MM-yyyy";
const string dateTimeFormat = dateFormat + ", HH:mm";
// Would be better to use AssumeLocal and provide "ru-RU" culture,
// but doesn't work cross-platform
const DateTimeStyles style = DateTimeStyles.AssumeUniversal;
var culture = CultureInfo.InvariantCulture;
var dateText = GetDateFromDocument(content);
//Correct way but will not always work on cross-platform
//var localTimeZone = TimeZoneInfo.FindSystemTimeZoneById("Russian Standard Time");
//var nowLocal = TimeZoneInfo.ConvertTime(DateTime.UtcNow, localTimeZone);
// Russian Standard Time is +03:00, no DST
const int russianStandardTimeDiff = 3;
var nowLocal = DateTime.UtcNow.AddHours(russianStandardTimeDiff);
dateText = dateText
.Replace("Вчера", nowLocal.AddDays(-1).ToString(dateFormat))
.Replace("Сегодня", nowLocal.ToString(dateFormat));
if (DateTime.TryParseExact(dateText, dateTimeFormat, culture, style, out var date))
{
var utcDate = date.ToUniversalTime();
return utcDate.AddHours(-russianStandardTimeDiff);
}
logger.Warn($"[AniDub] Date time couldn't be parsed on '{url}'. Date text: {dateText}");
return DateTime.UtcNow;
}
private static string GetDateFromDocument(IElement content)
{
const string DateSelector = ".story_inf > li:nth-child(2)";
var domDate = content.QuerySelector(DateSelector).LastChild;
if (domDate?.NodeName != "#text")
{
return string.Empty;
}
return domDate.NodeValue.Trim();
}
private bool IsAuthorized(WebClientStringResult result) =>
result.Content.Contains("index.php?action=logout");
private IEnumerable<int> ParseCategories(Uri showUri)
{
Dictionary<string, string> categoriesMap = CategoriesMap;
var path = showUri.AbsolutePath.ToLowerInvariant();
return categoriesMap
.Where(categoryMap => path.StartsWith(categoryMap.Key))
.Select(categoryMap => MapTrackerCatToNewznab(categoryMap.Value))
.FirstOrDefault();
}
private async Task<List<ReleaseInfo>> PerformSearch(TorznabQuery query)
{
const string searchLinkSelector = "#dle-content > .searchitem > h3 > a";
var releases = new List<ReleaseInfo>();
var response = await PostDataWithCookies(SearchUrl, PreparePostData(query));
try
{
var parser = new HtmlParser();
var document = await parser.ParseDocumentAsync(response.Content);
foreach (var linkNode in document.QuerySelectorAll(searchLinkSelector))
{
var link = linkNode.GetAttribute("href");
releases.AddRange(await FetchShowReleases(link));
}
}
catch (Exception ex)
{
OnParseError(response.Content, ex);
}
return releases;
}
private List<KeyValuePair<string, string>> PreparePostData(TorznabQuery query)
{
var data = new List<KeyValuePair<string, string>>
{
{ "do", "search" },
{ "subaction", "search" },
{ "search_start", "1" },
{ "full_search", "1" },
{ "result_from", "1" },
{ "story", NormalizeSearchQuery(query)},
{ "titleonly", "0" },
{ "searchuser", "" },
{ "replyless", "0" },
{ "replylimit", "0" },
{ "searchdate", "0" },
{ "beforeafter", "after" },
{ "sortby", "" },
{ "resorder", "desc" },
{ "showposts", "1" },
};
data.AddRange(PrepareCategoriesQuery(query));
return data;
}
private IEnumerable<KeyValuePair<string, string>> PrepareCategoriesQuery(TorznabQuery query)
{
var categories = query.HasSpecifiedCategories
? MapTorznabCapsToTrackers(query)
: DefaultSearchCategories;
return categories.Select(
category => new KeyValuePair<string, string>("catlist[]", category)
);
}
private static string NormalizeSearchQuery(TorznabQuery query)
{
var searchQuery = query.SanitizedSearchTerm;
// Convert S\dE\d to TV-{Season}
// because of the convention on the tracker
searchQuery = SeasonInfoQueryRegex.Replace(
searchQuery,
match => match.Success ? $"TV-{int.Parse(match.Groups[1].Value)}" : string.Empty
);
if (query.Season > 0)
{
// Replace "TV- " with season from query
searchQuery = SeasonInfoRegex.Replace(searchQuery, string.Empty);
searchQuery += $" TV-{query.Season}";
}
// Search is normalized with '+' instead of spaces
return searchQuery.ToLowerInvariant().Replace(" ", "+");
}
}
}

View File

@ -0,0 +1,16 @@
namespace Jackett.Common.Models.IndexerConfig.Bespoke
{
internal class ConfigurationDataAniDub : ConfigurationDataBasicLogin
{
public BoolItem StripRussianTitle { get; private set; }
public ConfigurationDataAniDub() : base()
{
StripRussianTitle = new BoolItem
{
Name = "Strip Russian Title",
Value = true
};
}
}
}