1
0
Fork 0
mirror of https://github.com/Jackett/Jackett synced 2025-01-01 12:46:23 +00:00

TvStore: refactor and update (#7978)

This commit is contained in:
Cory 2020-04-02 19:43:32 -05:00 committed by GitHub
parent 4dce8f61d9
commit e1c15f82d2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1,12 +1,11 @@
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Globalization;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using AngleSharp.Html.Parser;
using Jackett.Common.Models;
using Jackett.Common.Models.IndexerConfig.Bespoke;
using Jackett.Common.Services.Interfaces;
@ -19,75 +18,77 @@ namespace Jackett.Common.Indexers
{
public class TVstore : BaseWebIndexer
{
private readonly Dictionary<int, long> _imdbLookup = new Dictionary<int, long>(); // _imdbLookup[internalId] = imdbId
private readonly Dictionary<long, int>
_internalLookup = new Dictionary<long, int>(); // _internalLookup[imdbId] = internalId
private readonly Regex _seriesInfoMatch = new Regex(
@"catl\[\d+\]=(?<seriesID>\d+).*catIM\[\k<seriesID>]='(?<ImdbId>\d+)'", RegexOptions.Compiled);
private readonly Regex _seriesInfoSearchRegex = new Regex(
@"S(?<season>\d{1,3})(?:E(?<episode>\d{1,3}))?$", RegexOptions.IgnoreCase);
public TVstore(IIndexerConfigurationService configService, WebClient wc, Logger l, IProtectionService ps) :
base("TV Store",
description: "TV Store is a HUNGARIAN Private Torrent Tracker for TV",
link: "https://tvstore.me/",
caps: new TorznabCapabilities
{
SupportsImdbTVSearch = true,
SupportsImdbMovieSearch = true // Needed for IMDb searches to work see #7977
},
configService: configService,
client: wc,
logger: l,
p: ps,
configData: new ConfigurationDataTVstore())
{
Encoding = Encoding.UTF8;
Language = "hu-hu";
Type = "private";
AddCategoryMapping(1, TorznabCatType.TV);
AddCategoryMapping(2, TorznabCatType.TVHD);
AddCategoryMapping(3, TorznabCatType.TVSD);
}
private string LoginUrl => SiteLink + "takelogin.php";
private string LoginPageUrl => SiteLink + "login.php?returnto=%2F";
private string SearchUrl => SiteLink + "torrent/br_process.php";
private string DownloadUrl => SiteLink + "torrent/download.php";
private string BrowseUrl => SiteLink + "torrent/browse.php";
private readonly List<SeriesDetail> series = new List<SeriesDetail>();
private readonly Regex _searchStringRegex = new Regex(@"(.+?)S0?(\d+)(E0?(\d+))?$", RegexOptions.IgnoreCase);
private new ConfigurationDataTVstore configData
{
get => (ConfigurationDataTVstore)base.configData;
set => base.configData = value;
}
public TVstore(IIndexerConfigurationService configService, Utils.Clients.WebClient wc, Logger l, IProtectionService ps)
: base(name: "TVstore",
description: "TV Store is a HUNGARIAN Private Torrent Tracker for TV",
link: "https://tvstore.me/",
caps: new TorznabCapabilities(),
configService: configService,
client: wc,
logger: l,
p: ps,
configData: new ConfigurationDataTVstore())
{
Encoding = Encoding.UTF8;
Language = "hu-hu";
Type = "private";
TorznabCaps.SupportsImdbTVSearch = true;
AddCategoryMapping(1, TorznabCatType.TV);
AddCategoryMapping(2, TorznabCatType.TVHD);
AddCategoryMapping(3, TorznabCatType.TVSD);
}
private new ConfigurationDataTVstore configData => (ConfigurationDataTVstore)base.configData;
public override async Task<IndexerConfigurationStatus> ApplyConfiguration(JToken configJson)
{
LoadValuesFromJson(configJson);
var loginPage = await RequestStringWithCookies(LoginPageUrl, string.Empty);
var pairs = new Dictionary<string, string> {
{ "username", configData.Username.Value },
{ "password", configData.Password.Value },
{ "back", "%2F" },
{ "logout", "1"}
var pairs = new Dictionary<string, string>
{
{"username", configData.Username.Value},
{"password", configData.Password.Value},
{"back", "%2F"},
{"logout", "1"}
};
var result = await RequestLoginAndFollowRedirect(LoginUrl, pairs, loginPage.Cookies, true, referer: SiteLink);
await ConfigureIfOK(result.Cookies, result.Content?.Contains("Főoldal") == true, () => throw new ExceptionWithConfigData(
$"Error while trying to login with: Username: {configData.Username.Value} Password: {configData.Password.Value}", configData));
await ConfigureIfOK(
result.Cookies, result.Content?.Contains("Főoldal") == true,
() => throw new ExceptionWithConfigData("Error while trying to login.", configData));
return IndexerConfigurationStatus.RequiresTesting;
}
/// <summary>
/// Calculate the Upload Factor for the torrents
/// Calculate the Upload Factor for the torrents
/// </summary>
/// <returns>The calculated factor</returns>
/// <param name="dateTime">Date time.</param>
/// <param name="type">Type of the torrent (SeasonPack/SingleEpisode).</param>
public double UploadFactorCalculator(DateTime dateTime, string type)
/// <param name="isSeasonPack">Determine if torrent type is season pack or single episode</param>
private static double UploadFactorCalculator(DateTime dateTime, bool isSeasonPack)
{
var today = DateTime.Now;
var dd = (today - dateTime).Days;
var dd = (DateTime.Now - dateTime).Days;
/* In case of season Packs */
if (type.Equals("season"))
if (isSeasonPack)
{
if (dd >= 90)
return 4;
@ -103,95 +104,99 @@ namespace Jackett.Common.Indexers
if (dd >= 30)
return 1.5;
}
return 1;
}
/// <summary>
/// Parses the torrents from the content
/// Parses the torrents from the content
/// </summary>
/// <returns>The parsed torrents.</returns>
/// <param name="results">The result of the query</param>
/// <param name="query">Query.</param>
/// <param name="already_found">Number of the already found torrents.(used for limit)</param>
/// <param name="alreadyFound">Number of the already found torrents.(used for limit)</param>
/// <param name="limit">The limit to the number of torrents to download </param>
private async Task<List<ReleaseInfo>> ParseTorrents(WebClientStringResult results, TorznabQuery query, int already_found, int limit, int previously_parsed_on_page)
/// <param name="previouslyParsedOnPage">Current position in parsed results</param>
private async Task<List<ReleaseInfo>> ParseTorrentsAsync(WebClientStringResult results, int alreadyFound, int limit,
int previouslyParsedOnPage)
{
var releases = new List<ReleaseInfo>();
var queryParams = new NameValueCollection
{
{"func", "getToggle"},
{"w", "F"},
{"pg", "0"}
};
try
{
var content = results.Content;
/* Content Looks like this
* 2\15\2\1\1727\207244\1x08 \[WebDL-720p - Eng - AJP69]\gb\2018-03-09 08:11:53\akció, kaland, sci-fi \0\0\1\191170047\1\0\Anonymous\50\0\0\\0\4\0\174\0\
* 1\ 0\0\1\1727\207243\1x08 \[WebDL-1080p - Eng - AJP69]\gb\2018-03-09 08:11:49\akció, kaland, sci-fi \0\0\1\305729738\1\0\Anonymous\50\0\0\\0\8\0\102\0\0\0\0\1\\\
* First 3 items per page are total results, results per page, and results this page
* There is also a tail of ~4 items after the results for some reason. Looks like \1\\\
*/
var parameters = content.Split(new string[] { "\\" }, StringSplitOptions.None);
var type = "normal";
/*
* Split the releases by '\' and go through them.
* 27 element belongs to one torrent
*/
for (var j = previously_parsed_on_page * 27; (j + 27 < parameters.Length && ((already_found + releases.Count) < limit)); j = j + 27)
var parameters = results.Content.Split('\\');
var torrentsThisPage = int.Parse(parameters[2]);
var maxTorrents = Math.Min(torrentsThisPage, limit - alreadyFound);
var rows = parameters.Skip(3) //Skip pages info
.Select((str, index) => (index, str)) //Index each string for grouping
.GroupBy(n => n.index / 27) // each torrent is divided into 27 parts
.Skip(previouslyParsedOnPage).Take(maxTorrents)// only parse the rows we want
//Convert above query into a List<string>(27) in prep for parsing
.Select(entry => entry.Select(item => item.str).ToList());
foreach (var row in rows)
{
var release = new ReleaseInfo();
var imdb_id = 4 + j;
var torrent_id = 5 + j;
var is_season_id = 6 + j;
var publish_date_id = 9 + j;
var files_id = 13 + j;
var size_id = 14 + j;
var seeders_id = 23;
var peers_id = 24 + j;
var grabs_id = 25 + j;
type = "normal";
//IMDB id of the series
var seriesinfo = series.Find(x => x.id.Contains(parameters[imdb_id]));
if (seriesinfo != null && !parameters[imdb_id].Equals(""))
release.Imdb = long.Parse(seriesinfo.imdbid);
//ID of the torrent
var unixTimestamp = (int)(DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1))).TotalSeconds;
var fileinfoURL = SearchUrl + "?func=getToggle&id=" + parameters[torrent_id] + "&w=F&pg=0&now=" + unixTimestamp;
var fileinfo = (await RequestStringWithCookiesAndRetry(fileinfoURL)).Content;
release.Link = new Uri(DownloadUrl + "?id=" + parameters[torrent_id]);
release.Guid = release.Link;
release.Comments = release.Link;
var fileinf = fileinfo.Split(new string[] { "\\\\" }, StringSplitOptions.None);
if (fileinf.Length > 1)
var torrentId = row[(int)TorrentParts.TorrentId];
var downloadLink = new Uri(DownloadUrl + "?id=" + torrentId);
var imdbId = _imdbLookup.TryGetValue(int.Parse(row[(int)TorrentParts.InternalId]), out var imdb)
? (long?)imdb
: null;
var files = int.Parse(row[(int)TorrentParts.Files]);
var size = long.Parse(row[(int)TorrentParts.SizeBytes]);
var seeders = int.Parse(row[(int)TorrentParts.Seeders]);
var leechers = int.Parse(row[(int)TorrentParts.Leechers]);
var grabs = int.Parse(row[(int)TorrentParts.Grabs]);
var publishDate = DateTime.Parse(row[(int)TorrentParts.PublishDate]);
var isSeasonPack = row[(int)TorrentParts.EpisodeInfo].Contains("évad");
queryParams["id"] = torrentId;
queryParams["now"] = DateTimeUtil.DateTimeToUnixTimestamp(DateTime.UtcNow)
.ToString(CultureInfo.InvariantCulture);
var filesList = (await RequestStringWithCookiesAndRetry(SearchUrl + "?" + queryParams.GetQueryString()))
.Content;
var firstFileName = filesList.Split(
new[]
{
@"\\"
}, StringSplitOptions.None)[1];
// Delete the file extension. Many first files are either mkv or nfo.
// Cannot confirm these are the only extensions, so generic remove all 3 char extensions at end of section.
firstFileName = Regex.Replace(firstFileName, @"\.\w{3}$", string.Empty);
if (isSeasonPack)
firstFileName = Regex.Replace(
firstFileName, @"(?<=S\d+)E\d{2,3}", string.Empty, RegexOptions.IgnoreCase);
var category = new[]
{
release.Title = fileinf[1];
if (fileinf[1].Length > 5 && fileinf[1].Substring(fileinf[1].Length - 4).Contains("."))
release.Title = fileinf[1].Substring(0, fileinf[1].Length - 4);
}
// SeasonPack check
if (parameters[is_season_id].Contains("évad/"))
TvCategoryParser.ParseTvShowQuality(firstFileName)
};
var release = new ReleaseInfo
{
type = "season";
// If this is a seasonpack, remove episode nunmber from title.
release.Title = Regex.Replace(release.Title, "s0?(\\d+)(e0?(\\d+))", "S$1", RegexOptions.IgnoreCase);
}
release.PublishDate = DateTime.Parse(parameters[publish_date_id], CultureInfo.InvariantCulture);
release.Files = int.Parse(parameters[files_id]);
release.Size = long.Parse(parameters[size_id]);
release.Seeders = int.Parse(parameters[seeders_id]);
release.Peers = (int.Parse(parameters[peers_id]) + release.Seeders);
release.Grabs = int.Parse(parameters[grabs_id]);
release.MinimumRatio = 1;
release.MinimumSeedTime = 172800; // 48 hours
release.DownloadVolumeFactor = 1;
release.UploadVolumeFactor = UploadFactorCalculator(release.PublishDate, type);
release.Category = new List<int> { TvCategoryParser.ParseTvShowQuality(release.Title) };
if ((already_found + releases.Count) < limit)
releases.Add(release);
else
return releases;
Title = firstFileName,
Link = downloadLink,
Guid = downloadLink,
PublishDate = publishDate,
Files = files,
Size = size,
Category = category,
Seeders = seeders,
Peers = leechers + seeders,
Grabs = grabs,
MinimumRatio = 1,
MinimumSeedTime = 172800, // 48 hours
DownloadVolumeFactor = 1,
UploadVolumeFactor = UploadFactorCalculator(publishDate, isSeasonPack),
Imdb = imdbId
};
releases.Add(release);
}
}
catch (Exception ex)
{
@ -200,181 +205,119 @@ namespace Jackett.Common.Indexers
return releases;
}
/* Search is possible only based by Series ID.
* All known series ID is on main page, with their attributes. (ID, EngName, HunName, imdbid)
*/
/// <summary>
/// Get all series info known by site
/// These are:
/// - Series ID
/// - Hungarian name
/// - English name
/// - IMDB ID
/// Map internally used series info to its corresponding IMDB number.
/// Saves this data into 2 dictionaries for easy lookup from one value to the other
/// </summary>
/// <returns>The series info.</returns>
protected async Task<bool> GetSeriesInfo()
private async Task PopulateImdbMapAsync()
{
var result = await RequestStringWithCookiesAndRetry(BrowseUrl);
var parser = new HtmlParser();
var dom = parser.ParseDocument(result.Content);
var scripts = dom.QuerySelectorAll("script");
//TODO Linq
foreach (var script in scripts)
foreach (Match match in _seriesInfoMatch.Matches(result.Content))
{
if (script.TextContent.Contains("catsh=Array"))
{
//TODO no regex in pattern, investigate using string.Split instead?
var seriesKnowBySite = Regex.Split(script.TextContent, "catl");
//TODO consider converting to foreach
for (var i = 1; i < seriesKnowBySite.Length; i++)
{
var id = seriesKnowBySite[i];
var seriesElement = WebUtility.HtmlDecode(id).Split(';');
var hungarianName = seriesElement[1].Split('=')[1].Trim('\'').ToLower();
var englishName = seriesElement[2].Split('=')[1].Trim('\'').ToLower();
var seriesId = seriesElement[0].Split('=')[1].Trim('\'');
var imdbId = seriesElement[7].Split('=')[1].Trim('\'');
var seriesDetail = new SeriesDetail
{
HunName = hungarianName,
EngName = englishName,
id = seriesId,
imdbid = imdbId
};
series.Add(seriesDetail);
}
}
var internalId = int.Parse(match.Groups["seriesID"].Value);
var imdbId = long.Parse(match.Groups["ImdbId"].Value);
_imdbLookup[internalId] = imdbId;
_internalLookup[imdbId] = internalId;
}
return true;
}
protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query)
{
//TODO convert to initializer
var releases = new List<ReleaseInfo>();
// If series from sites are indexed then we don't need to reindex them.
if (series?.Any() != true)
await GetSeriesInfo();
var unixTimestamp = (int)(DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1))).TotalSeconds;
WebClientStringResult results;
var searchString = "";
var exactSearchURL = "";
var page = 1;
SeriesDetail seriesinfo = null;
var base64coded = "";
var noimdbmatch = false;
var limit = query.Limit;
if (limit == 0)
limit = 100;
if (!_imdbLookup.Any())
await PopulateImdbMapAsync();
var queryParams = new NameValueCollection
{
{"now", DateTimeUtil.DateTimeToUnixTimestamp(DateTime.UtcNow).ToString(CultureInfo.InvariantCulture)},
{"p", "1"}
};
if (query.Limit == 0)
query.Limit = 100;
if (query.IsImdbQuery)
{
seriesinfo = series.Find(x => x.imdbid.Equals(query.ImdbIDShort));
if (seriesinfo != null && !query.ImdbIDShort.Equals(""))
{
var querrySeason = "";
if (query.Season != 0)
querrySeason = query.Season.ToString();
exactSearchURL = SearchUrl + "?s=" + querrySeason + "&e=" + query.Episode + "&g=" + seriesinfo.id + "&now=" + unixTimestamp.ToString();
}
if (!string.IsNullOrEmpty(query.ImdbIDShort) && _internalLookup.TryGetValue(
long.Parse(query.ImdbIDShort), out var internalId))
queryParams.Add("g", internalId.ToString());
else
{
// IMDB_ID was not founded in site database.
return releases;
}
return Enumerable.Empty<ReleaseInfo>();
}
if (!query.IsImdbQuery || noimdbmatch)
else
{
/* SearchString format is the following: Seriesname 1X09 */
if (query.SearchTerm != null && !query.SearchTerm.Equals(""))
queryParams.Add("g", "0");
if (!string.IsNullOrWhiteSpace(query.SearchTerm))
{
searchString += query.SanitizedSearchTerm;
// convert SnnEnn to nnxnn for dashboard searches
if (query.Season == 0 && (query.Episode == null || query.Episode.Equals("")))
var searchString = query.SanitizedSearchTerm;
if (query.Season == 0 && string.IsNullOrWhiteSpace(query.Episode))
{
var searchMatch = _searchStringRegex.Match(searchString);
//Jackett doesn't check for lowercase s00e00 so do it here.
var searchMatch = _seriesInfoSearchRegex.Match(searchString);
if (searchMatch.Success)
{
query.Season = int.Parse(searchMatch.Groups[2].Value);
query.Episode = searchMatch.Groups[4].Success ? string.Format("{0:00}", (int?)int.Parse(searchMatch.Groups[4].Value)) : null;
searchString = searchMatch.Groups[1].Value; // strip SnnEnn
query.Season = int.Parse(searchMatch.Groups["season"].Value);
query.Episode = searchMatch.Groups["episode"].Success
? $"{int.Parse(searchMatch.Groups["episode"].Value):00}"
: null;
query.SearchTerm = searchString.Remove(searchMatch.Index, searchMatch.Length).Trim(); // strip SnnEnn
}
}
if (query.Season != 0)
searchString += " " + query.Season.ToString();
if (query.Episode != null && !query.Episode.Equals(""))
searchString += string.Format("x{0:00}", int.Parse(query.Episode));
}
else
{
// if searchquery is empty this is a test, so shorten the response time
limit = 20;
}
else if (query.IsTest)
query.Limit = 20;
/* Search string must be converted to Base64 */
var plainTextBytes = System.Text.Encoding.UTF8.GetBytes(searchString);
base64coded = System.Convert.ToBase64String(plainTextBytes);
exactSearchURL = SearchUrl + "?gyors=" + base64coded + "&p=" + page + "&now=" + unixTimestamp.ToString();
// Search string must be converted to Base64
var plainTextBytes = Encoding.UTF8.GetBytes(query.SanitizedSearchTerm);
queryParams.Add("c", Convert.ToBase64String(plainTextBytes));
}
/*Start search*/
results = await RequestStringWithCookiesAndRetry(exactSearchURL);
if (query.Season != 0)
{
queryParams.Add("s", query.Season.ToString());
if (!string.IsNullOrWhiteSpace(query.Episode))
queryParams.Add("e", query.Episode);
}
/* Parse page Information from result */
var results = await RequestStringWithCookiesAndRetry(SearchUrl + "?" + queryParams.GetQueryString());
// Parse page Information from result
var content = results.Content;
var splits = content.Split('\\');
var max_found = int.Parse(splits[0]);
var torrent_per_page = int.Parse(splits[1]);
if (torrent_per_page == 0)
return releases;
var start_page = (query.Offset / torrent_per_page) + 1;
var previously_parsed_on_page = query.Offset - (start_page * torrent_per_page) + 1; //+1 because indexing start from 0
if (previously_parsed_on_page <= 0)
previously_parsed_on_page = query.Offset;
var pages = Math.Ceiling(max_found / (double)torrent_per_page);
/* First page content is already ready */
if (start_page == 1)
var totalFound = int.Parse(splits[0]);
var torrentPerPage = int.Parse(splits[1]);
if (totalFound == 0 || query.Offset > totalFound)
return Enumerable.Empty<ReleaseInfo>();
var startPage = query.Offset / torrentPerPage + 1;
var previouslyParsedOnPage = query.Offset % torrentPerPage;
var pages = totalFound / torrentPerPage + 1;
// First page content is already ready
if (startPage == 1)
{
releases.AddRange(await ParseTorrents(results, query, releases.Count, limit, previously_parsed_on_page));
previously_parsed_on_page = 0;
start_page++;
releases.AddRange(await ParseTorrentsAsync(results, releases.Count, query.Limit, previouslyParsedOnPage));
previouslyParsedOnPage = 0;
startPage++;
}
for (page = start_page; (page <= pages && releases.Count < limit); page++)
for (var page = startPage; page <= pages && releases.Count < query.Limit; page++)
{
if (query.IsImdbQuery && seriesinfo != null)
exactSearchURL = SearchUrl + "?s=" + query.Season + "&e=" + query.Episode + "&g=" + seriesinfo.id + "&p=" + page + "&now=" + unixTimestamp.ToString();
else
exactSearchURL = SearchUrl + "?gyors=" + base64coded + "&p=" + page + "&now=" + unixTimestamp.ToString();
results = await RequestStringWithCookiesAndRetry(exactSearchURL);
releases.AddRange(await ParseTorrents(results, query, releases.Count, limit, previously_parsed_on_page));
previously_parsed_on_page = 0;
queryParams["page"] = page.ToString();
results = await RequestStringWithCookiesAndRetry(SearchUrl + "?" + queryParams.GetQueryString());
releases.AddRange(await ParseTorrentsAsync(results, releases.Count, query.Limit, previouslyParsedOnPage));
previouslyParsedOnPage = 0;
}
return releases;
}
}
public class SeriesDetail
{
public string id;
public string HunName;
public string EngName;
public string imdbid;
}
private enum TorrentParts
{
InternalId = 1,
TorrentId = 2,
EpisodeInfo = 3,
PublishDate = 6,
Files = 10,
SizeBytes = 11,
Seeders = 20,
Leechers = 21,
Grabs = 22
}
}
}