Jackett/src/Jackett/Indexers/LostFilm.cs

537 lines
22 KiB
C#
Raw Normal View History

using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Jackett.Models;
using Jackett.Models.IndexerConfig;
using Jackett.Utils;
using Jackett.Utils.Clients;
Feature/netcore preparation (#2035) * Move to use package reference for restoring nuget packages. * Return a task result for this async method. * Update to a supported version of the .NET Framework. This also has the side effect of allowing us to automatically generate our binding redirects on build. * Set the solution to target VS2017 * Update test solution csproj file to support being built through MSBuild 15 * Move to use package reference for restoring nuget packages. * Return a task result for this async method. * Update to a supported version of the .NET Framework. This also has the side effect of allowing us to automatically generate our binding redirects on build. * Set the solution to target VS2017 * Update test solution csproj file to support being built through MSBuild 15 * DateTimeRoutines does not have Nuget packages that support .NET Standard (and therefore .NET Core). We will have to include them for now until we can get rid of this dependency. * Move the interfaces into their own files. This will be useful when we share them between the .NET Core and .NET Framework WebAPI * Stage services that need to point to the new interface namespace. * Update CurlSharp to fix memory leak issue and support better runtime compatibility with OSX and Linux * Start spliting some interfaces into their own files - this will help by allowing us to split them out in the future into a seperate project so the actual implementations can stay within their respective architectures when required
2017-10-29 10:19:09 +00:00
using Jackett.Services.Interfaces;
using AngleSharp.Dom;
using AngleSharp.Parser.Html;
using Newtonsoft.Json.Linq;
using NLog;
namespace Jackett.Indexers
{
class LostFilm : BaseWebIndexer
{
private static Regex parsePlayEpisodeRegex = new Regex("PlayEpisode\\('(?<id>\\d+)','(?<season>\\d+)','(?<episode>\\d+)'\\)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static Regex parseReleaseDetailsRegex = new Regex("Видео:\\ (?<quality>.+).\\ Размер:\\ (?<size>.+).\\ Перевод", RegexOptions.Compiled | RegexOptions.IgnoreCase);
// http://www.lostfilm.tv/login
string ApiUrl { get { return SiteLink + "ajaxik.php"; } }
// http://www.lostfilm.tv/new
string DiscoveryUrl { get { return SiteLink + "new"; } }
// http://www.lostfilm.tv/search?q=breaking+bad
string SearchUrl { get { return SiteLink + "search"; } }
// PlayEpisode function produce urls like this:
// https://www.lostfilm.tv/v_search.php?c=119&s=5&e=16
string ReleaseUrl { get { return SiteLink + "v_search.php"; } }
internal class TrackerUrlDetails
{
internal string seriesId { get; private set; }
internal string season { get; private set; }
internal string episode { get; private set; }
internal TrackerUrlDetails(string seriesId, string season, string episode)
{
this.seriesId = seriesId;
this.season = season;
this.episode = episode;
}
internal TrackerUrlDetails(IElement button)
{
var trigger = button.GetAttribute("onclick");
var match = parsePlayEpisodeRegex.Match(trigger);
seriesId = match.Groups["id"].Value;
season = match.Groups["season"].Value;
episode = match.Groups["episode"].Value;
}
internal string GetEpisodeString()
{
var result = string.Empty;
if (!string.IsNullOrEmpty(season) && season != "0" && season != "999")
{
result += "S" + season;
if (!string.IsNullOrEmpty(episode) && episode != "0" && episode != "999")
{
result += "E" + episode;
}
}
return result;
}
}
new ConfigurationDataBasicLogin configData
{
get { return (ConfigurationDataBasicLogin)base.configData; }
set { base.configData = value; }
}
public LostFilm(IIndexerConfigurationService configService, IWebClient wc, Logger l, IProtectionService ps)
: base(name: "LostFilm.tv",
description: "Unique portal about foreign series",
link: "https://www.lostfilm.tv/",
caps: TorznabUtil.CreateDefaultTorznabTVCaps(),
configService: configService,
client: wc,
logger: l,
p: ps,
// TODO: Provide optional instructions
configData: new ConfigurationDataBasicLogin())
{
Encoding = Encoding.UTF8;
Language = "ru-ru";
Type = "semi-private";
}
public override async Task<IndexerConfigurationStatus> ApplyConfiguration(JToken configJson)
{
logger.Info("Applying configuration");
LoadValuesFromJson(configJson);
var data = new Dictionary<string, string>
{
{ "act", "users" },
{ "type", "login" },
{ "mail", configData.Username.Value },
{ "pass", configData.Password.Value },
{ "rem", "1" }
};
var result = await RequestLoginAndFollowRedirect(ApiUrl, data, CookieHeader, true, SiteLink, ApiUrl, true);
await ConfigureIfOK(result.Cookies, result.Content != null && result.Content.Contains("\"success\":true"), () =>
{
var errorMessage = result.Content;
throw new ExceptionWithConfigData(errorMessage, configData);
});
return IndexerConfigurationStatus.RequiresTesting;
}
protected override async Task<IEnumerable<ReleaseInfo>> PerformQuery(TorznabQuery query)
{
logger.Info("PerformQuery: " + query.GetQueryString());
// If the search string is empty use the latest releases
if (query.IsTest || string.IsNullOrWhiteSpace(query.SearchTerm))
{
return await FetchNewReleases();
}
else
{
return await PerformSearch(query);
}
}
private async Task<WebClientStringResult> RequestStringAndRelogin(string url)
{
var results = await RequestStringWithCookies(url);
if (results.Content.Contains("503 Service"))
{
throw new ExceptionWithConfigData(results.Content, configData);
}
else if (results.Content.Contains("href=\"/login\""))
{
// Re-login
await ApplyConfiguration(null);
return await RequestStringWithCookies(url);
}
else
{
return results;
}
}
private async Task<List<ReleaseInfo>> PerformSearch(TorznabQuery query)
{
logger.Info("PerformSearch: " + query.SanitizedSearchTerm + " [" + query.QueryType + "]");
var releases = new List<ReleaseInfo>();
// If set to `true` search string will be reduced by word each time tracker returns zero results.
// It's required for "search" queries because tracker search API could only return series, not episodes.
// Set this variable to `false` within do/while loop when results were achieved.
var shouldMorphQuery = query.IsSearch;
// Search query words. Consists of Series keywords that will be used for series search request, and Episode keywords that will be used for episode filtering.
var keywords = query.SanitizedSearchTerm.Split(' ').ToList();
// Number of first keywords that relates to Series keywords.
int searchKeywords = keywords.Count;
do
{
var searchString = string.Join(" ", keywords.Take(searchKeywords));
var data = new Dictionary<string, string>
{
{ "act", "common" },
{ "type", "search" },
{ "val", searchString }
};
logger.Info("> Searching: " + searchString);
var response = await PostDataWithCookies(url: ApiUrl, data: data);
try
{
var json = JToken.Parse(response.Content);
var jsonData = json["data"];
// Protect from {"data":false,"result":"ok"}
if (jsonData.Type == JTokenType.Object)
{
var series = jsonData["series"];
if (series != null && series.HasValues)
{
logger.Info("> Found " + series.Count().ToString() + " series -> breaking the loop");
shouldMorphQuery = false;
foreach (var serie in series)
{
var link = serie["link"].ToString();
var season = query.Season == 0 ? "/seasons" : "/season_" + query.Season.ToString();
var url = SiteLink + link.TrimStart('/') + season;
if (!string.IsNullOrEmpty(query.Episode))
{
// TODO: Add a Quick Path via v_search.php for "tvsearch" queries
url += "/episode_" + query.Episode;
var taskReleases = await FetchEpisodeReleases(url);
releases.AddRange(taskReleases);
}
else
{
var filterKeywords = keywords.Skip(searchKeywords);
var filter = string.Join(" ", filterKeywords);
var taskReleases = await FetchSeriesReleases(url, query, filter);
releases.AddRange(taskReleases);
}
}
}
}
}
catch (Exception ex)
{
OnParseError(response.Content, ex);
}
if (shouldMorphQuery)
{
searchKeywords--;
}
} while (shouldMorphQuery && searchKeywords > 0);
return releases;
}
#region Page parsing
private async Task<List<ReleaseInfo>> FetchNewReleases()
{
var url = DiscoveryUrl;
logger.Info("FetchNewReleases: " + url);
var results = await RequestStringAndRelogin(url);
var releases = new List<ReleaseInfo>();
try
{
var parser = new HtmlParser();
var document = parser.Parse(results.Content);
var rows = document.QuerySelectorAll("div.row");
foreach (var row in rows)
{
var link = row.QuerySelector("a").GetAttribute("href");
var episodeUrl = SiteLink + link.TrimStart('/');
var comments = new Uri(episodeUrl);
var dateString = row.QuerySelector("div.beta:contains('Дата')").TextContent; // Release date: beta - ENG, alpha - RUS
dateString = dateString.Substring(dateString.IndexOf(":") + 2); // 'Дата выхода Eng: 13.10.2017' -> '13.10.2017'
var date = DateTime.Parse(dateString, new CultureInfo(Language)); // dd.mm.yyyy
var episodeReleases = await FetchEpisodeReleases(episodeUrl);
foreach (var release in episodeReleases)
{
release.Comments = comments;
release.PublishDate = date;
}
releases.AddRange(episodeReleases);
}
}
catch (Exception ex)
{
OnParseError(results.Content, ex);
}
return releases;
}
private async Task<List<ReleaseInfo>> FetchEpisodeReleases(string url)
{
logger.Info("FetchEpisodeReleases: " + url);
var results = await RequestStringAndRelogin(url);
var releases = new List<ReleaseInfo>();
try
{
var parser = new HtmlParser();
var document = parser.Parse(results.Content);
var playButton = document.QuerySelector("div.external-btn");
if (playButton != null)
{
var urlDetails = new TrackerUrlDetails(playButton);
releases = await FetchTrackerReleases(urlDetails);
}
}
catch (Exception ex)
{
OnParseError(results.Content, ex);
}
return releases;
}
// `detailsUrl` is a series details url provided in search JSON response.
private async Task<List<ReleaseInfo>> FetchSeriesReleases(string url, TorznabQuery query, string filter)
{
logger.Info("FetchSeriesReleases: " + url + " S: " + query.Season.ToString() + " E: " + query.Episode + " Filter: " + filter);
var releases = new List<ReleaseInfo>();
var results = await RequestStringWithCookies(url);
try
{
var parser = new HtmlParser();
var document = parser.Parse(results.Content);
var seasons = document.QuerySelectorAll("div.serie-block");
if (string.IsNullOrEmpty(query.Episode) || string.IsNullOrEmpty(filter))
{
var rows = seasons.SelectMany(s => s.QuerySelectorAll("table.movie-parts-list > tbody > tr"));
foreach (var row in rows)
{
var couldBreak = false; // Set to `true` if searched episode was found.
if (!string.IsNullOrEmpty(filter))
{
var titles = row.QuerySelector("td.gamma > div");
if (titles.TextContent.IndexOf(filter, StringComparison.OrdinalIgnoreCase) == -1)
{
continue;
}
}
var playButton = row.QuerySelector("td.zeta > div.external-btn");
if (!string.IsNullOrEmpty(query.Episode))
{
var match = parsePlayEpisodeRegex.Match(playButton.GetAttribute("onclick"));
var episode = match.Groups["episode"];
if (episode == null || episode.Value != query.Episode)
{
continue;
}
couldBreak = true;
}
var dateColumn = row.QuerySelector("td.delta"); // Contains both Date and EpisodeURL
var link = dateColumn.GetAttribute("onclick"); // goTo('/series/Prison_Break/season_5/episode_9/',false)
link = TrimString(link, '\'', '\'');
var episodeUrl = SiteLink + link.TrimStart('/');
var comments = new Uri(episodeUrl);
var dateString = dateColumn.QuerySelector("span").TextContent;
dateString = dateString.Substring(dateString.IndexOf(":") + 2); // 'Eng: 23.05.2017' -> '23.05.2017'
var date = DateTime.Parse(dateString, new CultureInfo(Language)); // dd.mm.yyyy
var urlDetails = new TrackerUrlDetails(playButton);
var episodeReleases = await FetchTrackerReleases(urlDetails);
foreach (var release in episodeReleases)
{
release.Comments = comments;
release.PublishDate = date;
}
releases.AddRange(episodeReleases);
if (couldBreak)
{
break;
}
}
}
else if (query.Season > 0)
{
// Query for the whole season release. Strange query in terms of typical requests but doable.
var buttons = seasons.SelectMany(s => s.QuerySelectorAll("div.movie-details-block > div.external-btn"));
foreach (var playButton in buttons)
{
var match = parsePlayEpisodeRegex.Match(playButton.GetAttribute("onclick"));
var season = match.Groups["season"];
if (season != null && season.Value == query.Season.ToString())
{
var urlDetails = new TrackerUrlDetails(playButton);
// TODO: Set PublishDate = season.lastEpisode.publishDate for season releases.
releases = await FetchTrackerReleases(urlDetails);
// Skip other seasons
break;
}
}
}
else
{
throw new ArgumentException("Impossible combination of arguments");
}
}
catch (Exception ex)
{
OnParseError(results.Content, ex);
}
return releases;
}
#endregion
#region Tracker parsing
private async Task<List<ReleaseInfo>> FetchTrackerReleases(TrackerUrlDetails details)
{
var queryCollection = new NameValueCollection();
queryCollection.Add("c", details.seriesId);
queryCollection.Add("s", details.season);
queryCollection.Add("e", string.IsNullOrEmpty(details.episode) ? "999" : details.episode); // 999 is a synonym for the whole serie
var url = ReleaseUrl + "?" + queryCollection.GetQueryString();
logger.Info("FetchTrackerReleases: " + url);
// Get redirection page with generated link on it. This link can't be constructed manually as it contains Hash field and hashing algo is unknown.
var results = await RequestStringWithCookies(url);
if (results.Content == "log in first")
{
throw new ExceptionWithConfigData(results.Content, configData);
}
try
{
var parser = new HtmlParser();
var document = parser.Parse(results.Content);
var meta = document.QuerySelector("meta");
var metaContent = meta.GetAttribute("content");
// Follow redirection defined by async url.replace
var redirectionUrl = metaContent.Substring(metaContent.IndexOf("http"));
return await FollowTrackerRedirection(redirectionUrl, details);
}
catch (Exception ex)
{
OnParseError(results.Content, ex);
}
// Failure path
return new List<ReleaseInfo>();
}
private async Task<List<ReleaseInfo>> FollowTrackerRedirection(string url, TrackerUrlDetails details)
{
logger.Info("FollowTrackerRedirection: " + url);
var results = await RequestStringWithCookies(url);
var releases = new List<ReleaseInfo>();
try
{
var parser = new HtmlParser();
var document = parser.Parse(results.Content);
var rows = document.QuerySelectorAll("div.inner-box--item");
logger.Info("> Parsing " + rows.Count().ToString() + " releases");
var serieTitle = document.QuerySelector("div.inner-box--subtitle").TextContent;
serieTitle = serieTitle.Substring(0, serieTitle.LastIndexOf(','));
var episodeInfo = document.QuerySelector("div.inner-box--text").TextContent;
var episodeName = TrimString(episodeInfo, '(', ')');
foreach (var row in rows)
{
try
{
var release = new ReleaseInfo();
release.Category = new int[] { TorznabCatType.TV.ID };
var detailsInfo = row.QuerySelector("div.inner-box--desc").TextContent;
var releaseDetails = parseReleaseDetailsRegex.Match(detailsInfo);
if (releaseDetails == null)
{
throw new FormatException("Failed to map release details string: " + detailsInfo);
}
// Ru title: downloadLink.TextContent.Replace("\n", "");
// En title should be manually constructed.
var titleComponents = new string[] {
serieTitle, details.GetEpisodeString(), episodeName, releaseDetails.Groups["quality"].Value
};
release.Title = string.Join(" - ", titleComponents.Where(s => !string.IsNullOrEmpty(s)));
var downloadLink = row.QuerySelector("div.inner-box--link > a");
release.Link = new Uri(downloadLink.GetAttribute("href"));
release.Guid = release.Link;
var sizeString = releaseDetails.Groups["size"].Value;
sizeString = sizeString.Replace("ТБ", "TB"); // untested
sizeString = sizeString.Replace("ГБ", "GB");
sizeString = sizeString.Replace("МБ", "MB");
sizeString = sizeString.Replace("КБ", "KB"); // untested
release.Size = ReleaseInfo.GetBytes(sizeString);
logger.Info("> Add: " + release.Title);
releases.Add(release);
}
catch (Exception ex)
{
logger.Error(string.Format("{0}: Error while parsing row '{1}':\n\n{2}", ID, row.OuterHtml, ex));
}
}
}
catch (Exception ex)
{
OnParseError(results.Content, ex);
}
return releases;
}
#endregion
#region Helpers
private string TrimString(string s, char startChar, char endChar)
{
var start = s.IndexOf(startChar);
var end = s.LastIndexOf(endChar);
return s.Substring(start + 1, end - start - 1);
}
#endregion
}
}